def find_collocations(file_name, data, popular_word): text_file = open(file_name, 'r') file_content = text_file.read() most_common_words = find_most_common_words(file_content, popular_word) second_word = None third_word = None collocations = data text_file.seek(0) for line in text_file: for word in line.split(): first_word = second_word second_word = third_word third_word = trim_word(word) if (first_word not in most_common_words and second_word not in most_common_words) and \ (first_word and first_word[0].islower() and second_word and second_word[0].islower()): count_collocations(collocations, stem(first_word.lower()), stem(second_word.lower())) # dodatkowa iteracja dla ostatniego slowa first_word = second_word second_word = third_word count_collocations(collocations, first_word, second_word) collocations = find_whole_collocations_from_stems(collocations, file_content) return collocations, most_common_words, file_content
def find_collocations(text, data, popular_word): most_common_words = find_most_common_words(text, popular_word) second_word = None third_word = None collocations = data for word in text.split(): first_word = second_word second_word = third_word third_word = trim_word(word) if (first_word not in most_common_words and second_word not in most_common_words) and \ (first_word and first_word[0].islower() and second_word and second_word[0].islower()): count_collocations(collocations, stem(first_word.lower()), stem(second_word.lower())) # dodatkowa iteracja dla ostatniego slowa first_word = second_word second_word = third_word count_collocations(collocations, first_word, second_word) collocations = find_whole_collocations_from_stems(collocations, text) return collocations, most_common_words