def lemmatize(keywords,add_synsets=False): # lemmatizer = WordNetLemmatizer() new_keywords = [] for word in keywords: new_keywords.append(lemmatizer.lemmatize(word)) if add_synsets: synsets = wn.synsets(word) for syn in synsets: for lemma in syn.lemmas: new_keywords += lemma.name.lower().split("_") new_keywords = remove_duplicates_list(new_keywords) return new_keywords
def get_wordnet_def_entity_keywords(entity): synsets = wn.synsets(entity) if len(synsets) > 0: keywords = [] for synset in synsets: for lemma in synset.lemmas: keywords.extend( lemma.name.lower().split("_") ) definition = synset.definition.lower() tokenized_def = nltk.word_tokenize(definition) def_pos = nltk.pos_tag(tokenized_def) for (word, pos) in def_pos: if (pos in ['NN','NNP','NNPS','NNS']) and word != '(': # for some reason nltk tags ( as NN keywords.append(word) return remove_duplicates_list(keywords) else: return None