def in_wordnet(word): base = wordnet.morphy(word) if base is None: base = word for d in wordnet.Dictionaries.values(): if base in d: return True if word in d: return True return False
def select(cls, words): keywords = [] for word in words: woord = wn.morphy(word, wn.NOUN) if (woord != None): keywords.append(woord) labels = [] labels.append(wn.synset('politics.n.01')) labels.append(wn.synset('sport.n.01')) labels.append(wn.synset('food.n.01')) labels.append(wn.synset('party.n.01')) labels.append(wn.synset('education.n.01')) labels.append(wn.synset('book.n.01')) labels.append(wn.synset('tv.n.01')) labels.append(wn.synset('holiday.n.01')) labels.append(wn.synset('computer.n.01')) labels.append(wn.synset('science.n.01')) names = [ 'politics', 'sports', 'food', 'party', 'education', 'book', 'tv', 'holiday', 'computer', 'science' ] #labels.append(wn.synset('animal.n.01')) #names = ['politics', 'sports', 'food', 'party', 'education', 'book', 'tv', 'holiday', 'computer', 'science', 'animal'] length = len(labels) interest = np.zeros(length) for keyword in keywords: keyword = keyword + '.n.01' keyword = wn.synset(keyword) for label in labels: interest[labels.index(label)] = +keyword.path_similarity(label) classification = [] #By changing n, one can output their top N interest classes n = 3 for i in range(0, n): element = max(xrange(len(interest)), key=lambda x: interest[x]) classification.append(names[element]) interest[element] = 0 return classification
def runWordnet(): #if False: #for leaf in c.leaves(): #if(leaf[1]=="VBN" or leaf[1]=="VBD" or leaf[1]=="JJ"): #if (re.match("TO|BE.*|DO.*|HV.*|MD|IN|PP", leaf[1])): # cfdcohort[cohortvalue].inc(leaf) # cfdleaves[leaf].inc(cohortvalue) maxscore = -1 try: verb = wordnet.morphy(leaf[0], wordnet.VERB) senses = wordnet.V[verb] for sense in senses: ref_verbs = ["submit","obtain","enter","access","archive","retrieve","present","post","query","import","download","view","find","deposit"] s = set([]) for ref in ref_verbs: newscore = wordnet.V[ref][0].path_similarity(sense) s.update(wordnet.V[ref][0].hypernym_paths()[0]) maxscore = max(maxscore,newscore) print s,"\n\n" except: maxscore = -2 print maxscore
def stem(self, word): return morphy(word)