def sim_ox_wn_defi_WDS_via_defi_of_curr_main_syn(word): dict_vectors_wn = WordnetParseDefinition.get_dict_vectors_synsets_for_word(word) (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(dict_vectors_wn) dict_vectors_wn_defi = WordnetParseDefinition.get_vectors_defi_for_word(word) (keys_wn_defi, vectors_wn_defi) = Util.get_keys_values_of_dict(dict_vectors_wn_defi) definitions = OxfordParser.get_definitions_of_word(word) m2d_sim = [[0 for x in range(len(definitions))] for x in range(len(vectors_wn))] for i in range(len(vectors_wn)): vector_wn = vectors_wn[i] vector_wn_defi = vectors_wn_defi[i] dict_vectors_ox = OxParseDefinition.get_dict_vectors_synsets_for_word(word, vector_wn_defi) (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(dict_vectors_ox) for j in range(len(vectors_ox)): vector_ox = vectors_ox[j] m2d_sim[i][j] = sim_2_vector(vector_ox, vector_wn) cal_sim_ngrams(word) return m2d_sim
def sim_ox_wn_value_main_synsets(word): dict_vectors_wn = WordnetParseDefinition.get_dict_vectors_value_for(word) synsets_wn = WordnetHandler.get_synsets_for_word(word,'n') dict_vectors_ox = OxParseDefinition.get_vectors_value_for_word(word, synsets_wn) (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(dict_vectors_wn) (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(dict_vectors_ox) m2d_sim_defi_temp = sim_ox_wn_defi_WDS_via_main_syns(word) DebugHandler.print_2d_matrix(m2d_sim_defi_temp) m2d_sim_defi = [[0 for x in range(len(vectors_wn))] for x in range(len(vectors_ox))] for i in range(len(vectors_wn)): for j in range(len(vectors_ox)): m2d_sim_defi[j][i] = m2d_sim_defi_temp[i][j] m2d_sim = [[0 for x in range(len(vectors_ox))] for x in range(len(vectors_wn))] for i in range(len(vectors_wn)): vector_wn = vectors_wn[i] print vector_wn for j in range(len(vectors_ox)): vector_ox = vectors_ox[j] cosine = spatial.distance.cosine(m2d_sim_defi[j], vector_wn) m2d_sim[i][j] = cosine print "\n" for j in range(len(vectors_ox)): vector_ox = vectors_ox[j] print vector_ox return m2d_sim
def sim_ox_wn_defi_WDS_via_main_syns(word): dict_vectors_wn = WordnetParseDefinition.get_dict_vectors_synsets_for_word(word) synsets_wn = WordnetHandler.get_synsets_for_word(word,'n') dict_vectors_ox = OxParseDefinition.get_dict_vectors_synsets_for_word(word, synsets_wn) # (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(dict_vectors_wn) (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(dict_vectors_ox) # m2d_sim = sim_wn_ox_vector(vectors_ox, vectors_wn) # cal_sim_ngrams(word) # write to file # # - - - - - - - - - - - - - - - - - - - - - - - - - # for i in range(len(keys_wn)): # m2d_sim[i].insert(0,keys_wn[i]); # # - - - - - - - - - - - - - - - - - - - - - - - - - # # row # row_dict = []; # row_dict.append(word); # for i in range(len(keys_ox)): # row_dict.append(keys_ox[i].encode('utf8')); # # - - - - - - - - - - - - - - - - - - - - - - - - - # filename = 'Results/vector_definition/' + word + '.csv' # FileProcess.append_to_excel_file(filename, row_dict, m2d_sim) # # - - - - - - - - - - - - - - - - - - - - - - - - - return m2d_sim
def sim_ox_wn_defi_WDS_via_1_main_syn(word): dict_vectors_wn = WordnetParseDefinition.get_dict_vectors_synsets_for_word(word) synsets_wn = WordnetHandler.get_synsets_for_word(word,'n') dict_vectors_ox = OxParseDefinition.get_dict_vectors_synsets_for_word(word, synsets_wn) (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(dict_vectors_wn) (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(dict_vectors_ox) m2d_sim = sim_wn_ox_vector(vectors_ox, vectors_wn) return m2d_sim
def cal_sim_ngrams(word): dict_gloss_wn = WordnetParseDefinition.get_gloss_for_jacc(word) (key_wn, wn_gloss) = Util.get_keys_values_of_dict(dict_gloss_wn) ox_gloss = OxfordParser.get_definitions_of_word_for_jacc(word) cal_n_grams_sim(word, wn_gloss, ox_gloss, 2) cal_n_grams_sim(word, wn_gloss, ox_gloss, 3) cal_n_grams_sim(word, wn_gloss, ox_gloss, 4) for i in range(len(wn_gloss)): wn_gloss[i] = wn_gloss[i].replace(word, "") for i in range(len(ox_gloss)): ox_gloss[i] = ox_gloss[i].replace(word, "") cal_jacc_sim(word, wn_gloss, ox_gloss)
def sim_ox_wn_defi_WDS_via_main_syns_for_reduce(synsets_wn, status_synsets_wn, word): dict_vectors_wn = WordnetParseDefinition.get_dict_vectores_synsets_for_synsets(synsets_wn) synsets_reduce_wn = list(synsets_wn) for i in reversed(range(len(synsets_reduce_wn))): if status_synsets_wn[i] == 0: del synsets_reduce_wn[i] dict_vectors_ox = OxParseDefinition.get_dict_vectors_synsets_for_word(word, synsets_reduce_wn) (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(dict_vectors_wn) (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(dict_vectors_ox) m2d_sim = sim_wn_ox_vector_reduce(vectors_ox, vectors_wn, status_synsets_wn) return m2d_sim
def sim_ox_wn_defi_WDS_via_align_all(word): words_wn = WordnetParseDefinition.get_dict_vectors_words_for_word(word) (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(words_wn) words_ox = OxParseDefinition.get_dict_vectors_word_for_word(word) (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(words_ox) m2d_sim = [[0 for x in range(len(keys_ox))] for x in range(len(keys_wn))] for i in range(len(keys_wn)): vector_wn = vectors_wn[i] words_ox = [] for j in range(len(keys_ox)): words_ox += vectors_ox[j] synsets_wn = WordnetHandler.get_nearest_synsets_words_words_order(vector_wn, words_ox) for j in range(len(keys_ox)): synsets_ox = WordnetHandler.get_nearest_synsets_words_synsets_order(vectors_ox[j], synsets_wn) m2d_sim[i][j] = sim_2_vector(synsets_wn, synsets_ox) cal_sim_ngrams(word) return m2d_sim
def sim_ox_wn_defi_WDS_via_align(word): words_wn = WordnetParseDefinition.get_dict_vectors_words_for_word(word) (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(words_wn) words_ox = OxParseDefinition.get_dict_vectors_word_for_word(word) (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(words_ox) synsets_wn = WordnetHandler.get_synsets_for_word(word, 'n') m2d_sim = [[0 for x in range(len(keys_ox))] for x in range(len(keys_wn))] for i in range(len(keys_wn)): vector_wn = vectors_wn[i] synset_wn = synsets_wn[i] for j in range(len(keys_ox)): vector_ox = vectors_ox[j] m2d_sim[i][j] = WordnetHandler.sim_for_words_words_no_order(vector_ox, vector_wn, synset_wn) # (vector_1, vector_2) = WordnetHandler.get_nearest_synsets_words_words_noorder(vector_ox, vector_wn) # m2d_sim[i][j] = sim_2_vector(vector_1, vector_2) cal_sim_ngrams(word) return m2d_sim