def get_greatest_synsets_similarity_between(synsets_wn, nouns): synset_wn_max = None p_max = 0 if len(nouns) != 0: for synset_wn in synsets_wn: p_noun = 0 for noun in nouns: synsets_of_noun = WordnetHandler.get_synsets_for_word(noun, 'n') if len(synsets_of_noun) > 0: p_each_noun = 0 for synset_of_noun in synsets_of_noun: # p = synset_wn.path_similarity(synset_of_noun) p = WordnetHandler.cal_similarity(synset_wn, synset_of_noun) p_each_noun += p p_each_noun = p_each_noun/len(synsets_of_noun) p_noun += p_each_noun p = p_noun/len(nouns) if p > p_max: synset_wn_max = synset_wn else: print "no nouns" return synset_wn_max
def sim_2_word(word_1, word_2): synsets_1 = synsets_for_word(word_1) synsets_2 = synsets_for_word(word_2) p_max = 0 if __WSD_type__ == 1: for synset_1 in synsets_1: for synset_2 in synsets_2: p = WordnetHandler.cal_similarity(synset_1, synset_2) if p > p_max: p_max = p if __WSD_type__ == 0: if len(synsets_1) == 0 or len(synsets_2) == 0: return 0 p_max = WordnetHandler.cal_similarity(synsets_1[0], synsets_2[0]) return p_max
def sim_for_synset_and_synsetvector(a_synset, vector): p_max = 0 for (synset,weight) in vector: p = WordnetHandler.cal_similarity(a_synset, synset) if p is not None: # p = p*weight if p > p_max: p_max = p return p_max
def wordnet_based_synset(syn_wn, sen_ox): sim = 0.0001 words_ox = split_words(sen_ox) count = 0 for word in words_ox: p_max = 0 synsets_1 = synsets_for_word(word) for synset in synsets_1: p = WordnetHandler.cal_similarity(synset, syn_wn) if p > p_max: p_max = p if p_max != 0: count += 1 sim += p_max sim /= count + 0.001 return sim
def get_value_synset_for(cur_synset, synsets): synsets_value = [] definition = cur_synset.definition() nouns = PreprocessDefinition.preprocess_sentence(definition) # nouns = list(set(nouns)) for synset in synsets: count = 0 p = 0 for noun in nouns: synset_max = get_greatest_synset_similarity_between(synset, noun) if synset_max is not None: count += 1 sim = WordnetHandler.cal_similarity(synset, synset_max) if sim != None: p += sim if count != 0: p = p/count synsets_value.append(p) return synsets_value
def get_definition_value_with_synsetwn(definition, synsets_wn): synsets_value = [] # nouns = PreprocessDefinition.preprocess_sentence_to_nouns(definition) nouns = PreprocessDefinition.preprocess_sentence(definition) # nouns = list(set(nouns)) for synset in synsets_wn: count = 0 p = 0 for noun in nouns: synset_max = get_greatest_synset_similarity_between([synset], noun) if synset_max is not None: count += 1 sim = WordnetHandler.cal_similarity(synset, synset_max) if sim != None: p += sim if count != 0: p = p/count synsets_value.append(p) return synsets_value