コード例 #1
0
def _creating_specified_csv(word_input, source_ngram_tsv, csv_writer):
    counter_tsv = 0
    counter_csv = 0
    morph = pymorphy2.MorphAnalyzer()
    for line in source_ngram_tsv:
        counter_tsv += 1
        line = line.rstrip()
        line = line.replace('\t',' ')
        line = line.split(' ')
        first_word_without_tag = line[0].split('_')[0]
        first_word_normal_form = morph.parse(first_word_without_tag)[0].normal_form
        if word_input == first_word_normal_form:
            csv_writer.writerows([line])
            counter_csv += 1
        ngram_tools.updating_output_row("Creating file process: " + str(counter_tsv) + " lines of tsv file checked and "
                                        + str(counter_csv) + " lines were written in csv file." )
コード例 #2
0
def _creating_frequency_dict(csv_reader):
    morph = pymorphy2.MorphAnalyzer()
    counter = 0
    counter_of_objects = 0
    dict_of_objects = {}
    for line in csv_reader:
        counter += 1
        count_of_words = line[3]
        second_word_without_tag = line[1].split('_')[0]
        second_word_parsed = morph.parse(second_word_without_tag)[0]
        if {'NOUN', 'accs'} in second_word_parsed.tag or {'NOUN', 'ablt'} in second_word_parsed.tag \
                or {'NOUN', 'nomn'} in second_word_parsed.tag or {'NPRO', 'accs'} in second_word_parsed.tag \
                or {'NPRO', 'ablt'} in second_word_parsed.tag or {'NPRO', 'nomn'} in second_word_parsed.tag:
            try:
                dict_of_objects[second_word_without_tag] += int(count_of_words)
            except KeyError:
                dict_of_objects[second_word_without_tag] = 1
                counter_of_objects += 1
        ngram_tools.updating_output_row('Dictionary creation process: ' + str(counter) + ' lines parsed and ' +
                                        str(counter_of_objects) + ' objects were specified' )
    return dict_of_objects