Example #1
0
def attribute_freq(meta_list, attribute="keyword"):
    if attribute == "keyword":
        attributes = [keyword for paper in meta_list for keyword in paper.keyword]
    elif attribute == "author":
        attributes = [keyword for paper in meta_list for keyword in paper.author]
    else:
        raise AttributeError("Missing attribute")

    freq_dist = FreqDist(attributes)

    # Remove the empty key
    freq_dist.__delitem__('')

    # Sort by frequency and report top 10
    counts = sorted(list(freq_dist.items()), key=lambda x: x[1], reverse=True)[:10]
    for i, (att, count) in enumerate(counts):
        print("{}. {} ({})".format(i, att, count))
    print()
Example #2
0
##11--print out 30 most used words that begin with a letter
most_common = {word:fdist2[word] for word in fdist2 if word.isalpha()}
most_common = sorted(most_common.items(), key=lambda x:x[1], reverse=True)
print most_common[:30]

#6--eliminate words distinct only in letter pass
for word in fdist2:
    for word2 in fdist2:
        if word != word2:
            if word.lower() == word2.lower():
                fdist2[word] = fdist2[word] + fdist2[word2]
                fdist2[word2] = 0
                
for word in fdist2.items():
    if fdist2[word]==0:
        fdist2.__delitem__(word)
 
#7--eliminate words distinct only in affixes
porter = nltk.PorterStemmer()
no_affix = {porter.stem(word): fdist2[word] for word in fdist2}
print no_affix

#8--convert into nltk.Text object
text = Text(list3)
print text.concordance('point') #23 matches


#9--WordNet synsets
#part a
for ss in wordnet.synsets('point'):
    print(ss, ss.definition())