def remove_most_frequent_words(paper_file, num_words=20): ''' Removes the most frequent words ''' word_counts = document.get_counts3(paper_file) top_words = sorted(word_counts.keys(), key=word_counts.get, reverse=True)[:num_words] no_most_frequent_words = {} for w in word_counts: if w not in top_words: no_most_frequent_words[w] = word_counts[w] return no_most_frequent_words
def most_frequent_words_dict(paper_file, num_words=20): ''' Uses only the most frequent words ''' word_counts = document.get_counts3(paper_file) top_words = sorted(word_counts.keys(), key=word_counts.get, reverse=True)[:num_words] most_frequent_words = {} for w in word_counts: if w in top_words: most_frequent_words[w] = word_counts[w] return most_frequent_words