Esempio n. 1
0
def evaluate_algorithm(similarity_option, chunk):
    match = 0
    total = 0
    chunk_text = tree_to_list(chunk)
    surface_words, lemmas, morphy_poss = lemmatize_sentence(chunk_text, keepWordPOS=True)
    assert(len(lemmas) == len(chunk))
    for i in range(0, len(chunk)):
        semcor_word = chunk[i]
        # Skip stop-words and punctuation since neither they are in WordNet
        if not isinstance(semcor_word, nltk.tree.Tree):
            continue
        if not isinstance(semcor_word.label(), nltk.corpus.reader.wordnet.Lemma):
            # TODO: semcor_word.label() == 'such.s.00'
            continue
        # Skip named entities
        if semcor_word.label() == nltk.corpus.wordnet.lemma('group.n.01.group') and "') (NE " in semcor_word.pformat():
            continue

        context = [lemma for lemma in lemmas[max(0, i - 15):i+9]]
        lemma = lemmas[i]
        pos = morphy_poss[i]
        synset = max_similarity(context, lemma, pos=pos, option=similarity_option)

        if synset is None:
            # TODO: possibly this is bug, for example, "over-all" should be converted to "overall" before looking in WordNet database
            continue
        if synset is not None and semcor_word.label().synset() == synset:
            match += 1
        total += 1

    accuracy = match / total
    return match, total, accuracy
Esempio n. 2
0
def lesk_word_sense(text, word, pos_tagged):
    # get the meaaning of the word from the context
    if pos_tagged[1] == 'VERB':
        temp_pos = 'v'
    else:
        temp_pos = 'n'
    temp_synset = max_similarity(text, word, pos=temp_pos)
    if temp_synset:
        return temp_synset.definition()
    else:
        return word
Esempio n. 3
0
def get_sensekey(sentence, word, lemma, pos):
    wordnet_pos = {'VERB': wn.VERB, 'NOUN': wn.NOUN, 'ADJ': wn.ADJ, 'ADV': wn.ADV}
    try:
        synsets = max_similarity(sentence, word, option="resnik", pos=wordnet_pos[pos], best=False)
        for _, synset in synsets:
            for lemma_ in synset.lemmas():
                if lemma_.name().lower() == lemma.lower():
                    return lemma_.key()
    except:
        return None

    return None
	def get_wordsense(self,sent,word):
		word= word.lower()
		if len(word.split())>0:
			word = word.replace(" ","_")
		
		
		synsets = wn.synsets(word,'n')
		if synsets:
			wup = max_similarity(sent, word, 'wup', pos='n')
			adapted_lesk_output =  adapted_lesk(sent, word, pos='n')
			lowest_index = min (synsets.index(wup),synsets.index(adapted_lesk_output))
			return synsets[lowest_index]
		else:
			return None
Esempio n. 5
0
def test_lesk():
    #text = "Pulsation- Fluctuation of the brake pedal when the brakes are applied".lower()
    text = "push brake pedal on the vehicle"
    tokens = lesk_vec_lib.tokenize(text)
    pos_tagged = nltk.pos_tag(tokens, tagset='universal')
    print(pos_tagged)
    if pos_tagged[1][1]=='VERB':
        temp_pos = 'v'
    else:
        temp_pos = 'n'
    print(lesk(tokens, pos_tagged[1][0], pos=temp_pos).definition())
    print(max_similarity(text, 'brake', pos=temp_pos).definition())
    for ss in wn.synsets('brake'):
        print(ss, ss.definition())
    pass
Esempio n. 6
0
def word_sense(sentence, keyword):
    print("5.Getting word sense to obtain best MCQ options with WordNet...")
    word = keyword.lower()
    if len(word.split())>0:
        word = word.replace(" ","_")  
    syon_sets = wordnet.synsets(word,'n')
    if syon_sets:
        try:
            wup = max_similarity(sentence, word, 'wup', pos='n')
            adapted_lesk_output =  adapted_lesk(sentence, word, pos='n')
            lowest_index = min(syon_sets.index(wup),syon_sets.index(adapted_lesk_output))
            return syon_sets[lowest_index]
        except:
            return syon_sets[0]           
    else:
        return None
Esempio n. 7
0
def get_wordsense(sent, word):
    """
    
        Get a sentence of the meaning of a word, in context, using (1) Lesk algorithm and (2) max similarity
        Useful for word sense disambiguation tasks (e.g., one word means different things, 
        based on context)
    
        Paper: https://thesai.org/Downloads/Volume11No3/Paper_30-Adapted_Lesk_Algorithm.pdf
        
        The goal here is to see if the word has synonyms (or words close in meaning)
        that we could potentially use as answer choices
        
    """

    word = word.lower()

    if len(word.split()) > 0:
        word = word.replace(" ", "_")

    # get set of synonyms
    synsets = wn.synsets(word, 'n')

    if synsets:

        # get similarity between possible synsets of all words in
        # context sentence and possible synsets of ambiguous words,
        # to determine "context" of the word of interest and what it
        # "should" mean
        wup = max_similarity(sent, word, "wup", pos='n')

        # use Lesk algorithm, which will assume that words in the same
        # "neighborhood", or area of text, will tend to share the same topic.

        adapted_lesk_output = adapted_lesk(sent, word, pos="n")
        lowest_index = min(synsets.index(wup),
                           synsets.index(adapted_lesk_output))
        return synsets[lowest_index]
    else:
        print(f"No synonyms found for the word {word}")
        return None
Esempio n. 8
0
# print "#TESTING first_sense() ..."
# print "Context:", bank_sents[0]
# answer = first_sense('bank')
# print "Sense:", answer
# try: definition = answer.definition()
# except: definition = answer.definition
# print "Definition:", definition
# print
#
# print "#TESTING most_frequent_sense() ..."
# print "Context:", bank_sents[0]
# answer = most_frequent_sense('bank')
# print "Sense:", answer
# try: definition = answer.definition()
# except: definition = answer.definition
# print "Definition:", definition
# print

print ("======== TESTING similarity ===========\n")
from pywsd.similarity import max_similarity

for sim_choice in ["path", "lch", "res", "jcn", "lin"]:
    print ("Context:", bank_sents[0])
    print ("Similarity:", sim_choice)
    answer = max_similarity(bank_sents[0], 'bank', sim_choice, pos="n")
    print ("Sense:", answer)
    try: definition = answer.definition() 
    except: definition = answer.definition
    print ("Definition:", definition)
    print()
Esempio n. 9
0
print "#TESTING first_sense() ..."
print "Context:", bank_sents[0]
answer = first_sense('bank')
print "Sense:", answer
try: definition = answer.definition() 
except: definition = answer.definition
print "Definition:", definition
print

print "#TESTING most_frequent_sense() ..."
print "Context:", bank_sents[0]
answer = most_frequent_sense('bank')
print "Sense:", answer
try: definition = answer.definition() 
except: definition = answer.definition
print "Definition:", definition
print

print "======== TESTING similarity ===========\n"
from pywsd.similarity import max_similarity

for sim_choice in ["path", "lch", "wup", "res", "jcn", "lin"]:
    print "Context:", bank_sents[0]
    print "Similarity:", sim_choice 
    answer = max_similarity(bank_sents[0], 'bank', sim_choice, pos="n")
    print "Sense:", answer
    try: definition = answer.definition() 
    except: definition = answer.definition
    print "Definition:", definition
    print