def _creating_specified_csv(word_input, source_ngram_tsv, csv_writer): counter_tsv = 0 counter_csv = 0 morph = pymorphy2.MorphAnalyzer() for line in source_ngram_tsv: counter_tsv += 1 line = line.rstrip() line = line.replace('\t',' ') line = line.split(' ') first_word_without_tag = line[0].split('_')[0] first_word_normal_form = morph.parse(first_word_without_tag)[0].normal_form if word_input == first_word_normal_form: csv_writer.writerows([line]) counter_csv += 1 ngram_tools.updating_output_row("Creating file process: " + str(counter_tsv) + " lines of tsv file checked and " + str(counter_csv) + " lines were written in csv file." )
def _creating_frequency_dict(csv_reader): morph = pymorphy2.MorphAnalyzer() counter = 0 counter_of_objects = 0 dict_of_objects = {} for line in csv_reader: counter += 1 count_of_words = line[3] second_word_without_tag = line[1].split('_')[0] second_word_parsed = morph.parse(second_word_without_tag)[0] if {'NOUN', 'accs'} in second_word_parsed.tag or {'NOUN', 'ablt'} in second_word_parsed.tag \ or {'NOUN', 'nomn'} in second_word_parsed.tag or {'NPRO', 'accs'} in second_word_parsed.tag \ or {'NPRO', 'ablt'} in second_word_parsed.tag or {'NPRO', 'nomn'} in second_word_parsed.tag: try: dict_of_objects[second_word_without_tag] += int(count_of_words) except KeyError: dict_of_objects[second_word_without_tag] = 1 counter_of_objects += 1 ngram_tools.updating_output_row('Dictionary creation process: ' + str(counter) + ' lines parsed and ' + str(counter_of_objects) + ' objects were specified' ) return dict_of_objects