def evaluate_algorithm(similarity_option, chunk): match = 0 total = 0 chunk_text = tree_to_list(chunk) surface_words, lemmas, morphy_poss = lemmatize_sentence(chunk_text, keepWordPOS=True) assert(len(lemmas) == len(chunk)) for i in range(0, len(chunk)): semcor_word = chunk[i] # Skip stop-words and punctuation since neither they are in WordNet if not isinstance(semcor_word, nltk.tree.Tree): continue if not isinstance(semcor_word.label(), nltk.corpus.reader.wordnet.Lemma): # TODO: semcor_word.label() == 'such.s.00' continue # Skip named entities if semcor_word.label() == nltk.corpus.wordnet.lemma('group.n.01.group') and "') (NE " in semcor_word.pformat(): continue context = [lemma for lemma in lemmas[max(0, i - 15):i+9]] lemma = lemmas[i] pos = morphy_poss[i] synset = max_similarity(context, lemma, pos=pos, option=similarity_option) if synset is None: # TODO: possibly this is bug, for example, "over-all" should be converted to "overall" before looking in WordNet database continue if synset is not None and semcor_word.label().synset() == synset: match += 1 total += 1 accuracy = match / total return match, total, accuracy
def lesk_word_sense(text, word, pos_tagged): # get the meaaning of the word from the context if pos_tagged[1] == 'VERB': temp_pos = 'v' else: temp_pos = 'n' temp_synset = max_similarity(text, word, pos=temp_pos) if temp_synset: return temp_synset.definition() else: return word
def get_sensekey(sentence, word, lemma, pos): wordnet_pos = {'VERB': wn.VERB, 'NOUN': wn.NOUN, 'ADJ': wn.ADJ, 'ADV': wn.ADV} try: synsets = max_similarity(sentence, word, option="resnik", pos=wordnet_pos[pos], best=False) for _, synset in synsets: for lemma_ in synset.lemmas(): if lemma_.name().lower() == lemma.lower(): return lemma_.key() except: return None return None
def get_wordsense(self,sent,word): word= word.lower() if len(word.split())>0: word = word.replace(" ","_") synsets = wn.synsets(word,'n') if synsets: wup = max_similarity(sent, word, 'wup', pos='n') adapted_lesk_output = adapted_lesk(sent, word, pos='n') lowest_index = min (synsets.index(wup),synsets.index(adapted_lesk_output)) return synsets[lowest_index] else: return None
def test_lesk(): #text = "Pulsation- Fluctuation of the brake pedal when the brakes are applied".lower() text = "push brake pedal on the vehicle" tokens = lesk_vec_lib.tokenize(text) pos_tagged = nltk.pos_tag(tokens, tagset='universal') print(pos_tagged) if pos_tagged[1][1]=='VERB': temp_pos = 'v' else: temp_pos = 'n' print(lesk(tokens, pos_tagged[1][0], pos=temp_pos).definition()) print(max_similarity(text, 'brake', pos=temp_pos).definition()) for ss in wn.synsets('brake'): print(ss, ss.definition()) pass
def word_sense(sentence, keyword): print("5.Getting word sense to obtain best MCQ options with WordNet...") word = keyword.lower() if len(word.split())>0: word = word.replace(" ","_") syon_sets = wordnet.synsets(word,'n') if syon_sets: try: wup = max_similarity(sentence, word, 'wup', pos='n') adapted_lesk_output = adapted_lesk(sentence, word, pos='n') lowest_index = min(syon_sets.index(wup),syon_sets.index(adapted_lesk_output)) return syon_sets[lowest_index] except: return syon_sets[0] else: return None
def get_wordsense(sent, word): """ Get a sentence of the meaning of a word, in context, using (1) Lesk algorithm and (2) max similarity Useful for word sense disambiguation tasks (e.g., one word means different things, based on context) Paper: https://thesai.org/Downloads/Volume11No3/Paper_30-Adapted_Lesk_Algorithm.pdf The goal here is to see if the word has synonyms (or words close in meaning) that we could potentially use as answer choices """ word = word.lower() if len(word.split()) > 0: word = word.replace(" ", "_") # get set of synonyms synsets = wn.synsets(word, 'n') if synsets: # get similarity between possible synsets of all words in # context sentence and possible synsets of ambiguous words, # to determine "context" of the word of interest and what it # "should" mean wup = max_similarity(sent, word, "wup", pos='n') # use Lesk algorithm, which will assume that words in the same # "neighborhood", or area of text, will tend to share the same topic. adapted_lesk_output = adapted_lesk(sent, word, pos="n") lowest_index = min(synsets.index(wup), synsets.index(adapted_lesk_output)) return synsets[lowest_index] else: print(f"No synonyms found for the word {word}") return None
# print "#TESTING first_sense() ..." # print "Context:", bank_sents[0] # answer = first_sense('bank') # print "Sense:", answer # try: definition = answer.definition() # except: definition = answer.definition # print "Definition:", definition # print # # print "#TESTING most_frequent_sense() ..." # print "Context:", bank_sents[0] # answer = most_frequent_sense('bank') # print "Sense:", answer # try: definition = answer.definition() # except: definition = answer.definition # print "Definition:", definition # print print ("======== TESTING similarity ===========\n") from pywsd.similarity import max_similarity for sim_choice in ["path", "lch", "res", "jcn", "lin"]: print ("Context:", bank_sents[0]) print ("Similarity:", sim_choice) answer = max_similarity(bank_sents[0], 'bank', sim_choice, pos="n") print ("Sense:", answer) try: definition = answer.definition() except: definition = answer.definition print ("Definition:", definition) print()
print "#TESTING first_sense() ..." print "Context:", bank_sents[0] answer = first_sense('bank') print "Sense:", answer try: definition = answer.definition() except: definition = answer.definition print "Definition:", definition print print "#TESTING most_frequent_sense() ..." print "Context:", bank_sents[0] answer = most_frequent_sense('bank') print "Sense:", answer try: definition = answer.definition() except: definition = answer.definition print "Definition:", definition print print "======== TESTING similarity ===========\n" from pywsd.similarity import max_similarity for sim_choice in ["path", "lch", "wup", "res", "jcn", "lin"]: print "Context:", bank_sents[0] print "Similarity:", sim_choice answer = max_similarity(bank_sents[0], 'bank', sim_choice, pos="n") print "Sense:", answer try: definition = answer.definition() except: definition = answer.definition print "Definition:", definition print