def attribute_freq(meta_list, attribute="keyword"): if attribute == "keyword": attributes = [keyword for paper in meta_list for keyword in paper.keyword] elif attribute == "author": attributes = [keyword for paper in meta_list for keyword in paper.author] else: raise AttributeError("Missing attribute") freq_dist = FreqDist(attributes) # Remove the empty key freq_dist.__delitem__('') # Sort by frequency and report top 10 counts = sorted(list(freq_dist.items()), key=lambda x: x[1], reverse=True)[:10] for i, (att, count) in enumerate(counts): print("{}. {} ({})".format(i, att, count)) print()
##11--print out 30 most used words that begin with a letter most_common = {word:fdist2[word] for word in fdist2 if word.isalpha()} most_common = sorted(most_common.items(), key=lambda x:x[1], reverse=True) print most_common[:30] #6--eliminate words distinct only in letter pass for word in fdist2: for word2 in fdist2: if word != word2: if word.lower() == word2.lower(): fdist2[word] = fdist2[word] + fdist2[word2] fdist2[word2] = 0 for word in fdist2.items(): if fdist2[word]==0: fdist2.__delitem__(word) #7--eliminate words distinct only in affixes porter = nltk.PorterStemmer() no_affix = {porter.stem(word): fdist2[word] for word in fdist2} print no_affix #8--convert into nltk.Text object text = Text(list3) print text.concordance('point') #23 matches #9--WordNet synsets #part a for ss in wordnet.synsets('point'): print(ss, ss.definition())