Beispiel #1
0
def lemmatize(keywords,add_synsets=False):
  # lemmatizer = WordNetLemmatizer()
  new_keywords = []
  for word in keywords:
    new_keywords.append(lemmatizer.lemmatize(word))
    if add_synsets:
      synsets = wn.synsets(word)
      for syn in synsets:
        for lemma in syn.lemmas:
          new_keywords += lemma.name.lower().split("_")
  new_keywords = remove_duplicates_list(new_keywords)
  return new_keywords
Beispiel #2
0
def get_wordnet_def_entity_keywords(entity):
    synsets = wn.synsets(entity)
    if len(synsets) > 0:
      keywords = []
      for synset in synsets:
        for lemma in synset.lemmas:
          keywords.extend( lemma.name.lower().split("_") )
        definition = synset.definition.lower()
        tokenized_def = nltk.word_tokenize(definition)
        def_pos =  nltk.pos_tag(tokenized_def)
        for (word, pos) in def_pos:
            if (pos in ['NN','NNP','NNPS','NNS']) and word != '(': # for some reason nltk tags ( as NN
                keywords.append(word)
      return remove_duplicates_list(keywords)
    else:
      return None