def cal_features_from_sens_write_to_file(filename_sens, filename_output): f = open(filename_sens,'r'); line = f.readline(); while (line): if len(line) > 0: feature_values = "" sens = line.split("\t") sen_1 = sens[0] sen_2 = sens[1] feature_values += str(Literal.levenshtein_in_context(sen_1, sen_2, sens)) + "\t" # feature_values += str(ShallowSyntactic.jaccard_POS_in_context(sen_1, sen_2, sens)) + "\t" feature_values += str(WordnetBased.wordnet_based_in_context(sen_1, sen_2, sens, 0)) # feature_values += str(WordnetBased.wordnet_based_in_context(sen_1, sen_2, sens, 1)) FileProcess.append_value_to_file(feature_values, filename_output) line = f.readline(); f.close()
def cal_feature_values_for(syn_wn, syn_ox): feature_values = [] defi_wn = WordnetHandler.get_defi_for_syn(syn_wn) defi_ox = OxfordParser.get_defi_for_syn(syn_ox) gloss_wn = WordnetHandler.get_gloss_for_syn(syn_wn) gloss_ox = OxfordParser.get_gloss_for_syn(syn_ox) lemma_wn = WordnetHandler.get_lemma_for_synset(syn_wn) sd_ox = OxfordParser.get_short_defi_for_syn(syn_ox) ex_wn = WordnetHandler.get_ex_for_syn(syn_wn) ex_ox = OxfordParser.get_ex_for_syn(syn_ox) cl_ox = OxfordParser.get_collocation_for_syn(syn_ox) hyper_wn = WordnetHandler.get_hyper_defi_for_synset(syn_wn) mero_wn = WordnetHandler.get_mero_defi_for_synset(syn_wn) # # # # # # # # # # # # # # # # # # Literal literal_leven_value = 1-Literal.levenshtein(defi_wn, defi_ox) feature_values.append(literal_leven_value) literal_jacc_value = 1.00001-Literal.jaccard(defi_wn, defi_ox) feature_values.append(literal_jacc_value) # feature_values.append(literal_jacc_value+literal_leven_value) # # # # # # # # # # literal_leven_value = 1-Literal.levenshtein(gloss_wn, gloss_ox) feature_values.append(literal_leven_value) literal_jacc_value = 1.00001-Literal.jaccard(gloss_wn, gloss_ox) feature_values.append(literal_jacc_value) # feature_values.append(literal_jacc_value+literal_leven_value) # # # # # # # # # # literal_leven_ngram = literal_leven_value literal_jacc_ngram = literal_jacc_value ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 2) literal_jacc_ngram += ngrams_value literal_leven_ngram += ngrams_value ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 3) literal_jacc_ngram += ngrams_value literal_leven_ngram += ngrams_value ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 4) literal_jacc_ngram += ngrams_value literal_leven_ngram += ngrams_value ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 5) literal_jacc_ngram += ngrams_value literal_leven_ngram += ngrams_value feature_values.append(literal_jacc_ngram) # feature_values.append(literal_leven_ngram) # # # # # # # # # # # gloss_split_wn = Literal.split_and_stem(gloss_wn) # gloss_split_ox = Literal.split_and_stem(gloss_ox) # literal_jaro_winkler = Jelly.jaro_winkler(gloss_wn, gloss_ox) # feature_values.append(literal_jaro_winkler + literal_jacc_value) # # # # # # # # # # # literal_jacc_value = 1.00001-Literal.jaccard(ex_wn, ex_ox) # feature_values.append(literal_jacc_value) # # # # # # # # # # # # # # # # # # ShallowSyntactic # shallow_jaccard_POS = 0 # shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS(gloss_wn, gloss_ox) # shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 2) # shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 3) # shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 4) # feature_values.append(shallow_jaccard_POS) # # # # # # # # # # # # # # # # # # wordnet-based, WSD wn_value = WordnetBased.wordnet_based(defi_wn, defi_ox, 0) feature_values.append(wn_value) # wn_value = WordnetBased.wordnet_based(hyper_wn, defi_ox, 0) # feature_values.append(wn_value) # hypo_value = 0 # if len(syn_wn.hyponyms()) > 0: # for hypo in syn_wn.hyponyms(): # hypo_value += WordnetBased.wordnet_based_synset(hypo, defi_ox) # hypo_value /= len(syn_wn.hyponyms()) # feature_values.append(hypo_value) # hyper_value = 0 # if len(syn_wn.hypernyms()) > 0: # for hyper in syn_wn.hypernyms(): # hyper_value += WordnetBased.wordnet_based_synset(hyper, defi_ox) # hyper_value /= len(syn_wn.hypernyms()) # feature_values.append(hyper_value) # # wn_value = WordnetBased.wordnet_based(ex_wn, ex_ox,0) # feature_values.append(wn_value) # # wn_value_1 = WordnetBased.wordnet_based(defi_wn, defi_ox, 1) # feature_values.append(wn_value + wn_value_1) # # wn_value = WordnetBased.wordnet_based(gloss_wn, gloss_ox, 0) # feature_values.append(wn_value) # # wn_value_1 = WordnetBased.wordnet_based(gloss_wn, gloss_ox, 1) # feature_values.append(wn_value + wn_value_1) # # # # # # # # # # # # # # # # # # lsa # lsa_tfidf = LSA.sim_tfidf(defi_wn, defi_ox) # feature_values.append(lsa_tfidf) ## # lsa_tfidf = LSA.sim_tfidf(hyper_wn, defi_ox) # feature_values.append(lsa_tfidf) # # lsa_tfidf = LSA.sim_tfidf(gloss_wn, gloss_ox) # feature_values.append(lsa_tfidf) # lsa_tfidf = LSA.sim_tfidf(lemma_wn, sd_ox) # feature_values.append(lsa_tfidf) # # lsa_tfidf = LSA.sim_tfidf(ex_wn, ex_ox) # feature_values.append(lsa_tfidf) return feature_values