def write_sens_for_reading(syns_wn, syns_ox, filename_output): for i_wn in range(len(syns_wn)): for i_ox in range(len(syns_ox)): defi_wn = syns_wn[i_wn].definition() defi_ox = syns_ox[str(i_ox)]["d"] value = defi_wn + "\t" + defi_ox FileProcess.append_value_to_file(value, filename_output)
def parse_ox_wn_defi_to_input(word): defis_wn = WordnetHandler.get_definitions_for_word(word) defis_ox = OxfordParser.get_definitions_of_word_for_svm(word) for defi_wn in defis_wn: for defi_ox in defis_ox: value = defi_wn + "\t" + defi_ox FileProcess.append_value_to_file(value, __filename_input_sen__)
def get_synset_gloss(synset, filename): result = "" for lemma in synset.lemmas(): gloss = lemma.name().replace("_", " ") result += gloss + ". " result += synset.definition() + ". " for example in synset.examples(): result += example + "." FileProcess.append_value_to_file(result, filename) for hypo in synset.hyponyms(): get_synset_gloss(hypo, filename)
def create_input_sens_test(dict_ox): flag_can_go = False for word in dict_ox: if word == "blockage": flag_can_go = True if flag_can_go == False: continue if len(dict_ox[word]) == 0: continue defis_wn = WordnetHandler.get_definitions_for_word(word) defis_ox = OxfordParser.get_definitions_of_word_for_svm(word) if len(defis_ox) == 1 and len(defis_wn) == 1: continue if len(defis_ox) == 1 and len(defis_wn) > 1: all_defi_wn = "" for defi_wn in defis_wn: all_defi_wn += defi_wn + "\t" if all_defi_wn != "": all_defi_wn = all_defi_wn[:-1] for defi_wn in defis_wn: for defi_ox in defis_ox: value = defi_wn + "\t" + defi_ox + "\t" + all_defi_wn FileProcess.append_value_to_file(value, __filename_input_sen_test__) else: for defi_wn in defis_wn: all_defi_ox = "" for defi_ox in defis_ox: all_defi_ox += defi_ox + "\t" if all_defi_ox != "": all_defi_ox = all_defi_ox[:-1] for defi_ox in defis_ox: value = defi_wn + "\t" + defi_ox + "\t" + all_defi_ox FileProcess.append_value_to_file(value, __filename_input_sen_test__)
def cal_features_from_sens_write_to_file(filename_sens, filename_output): f = open(filename_sens,'r'); line = f.readline(); while (line): if len(line) > 0: feature_values = "" sens = line.split("\t") sen_1 = sens[0] sen_2 = sens[1] feature_values += str(Literal.levenshtein_in_context(sen_1, sen_2, sens)) + "\t" # feature_values += str(ShallowSyntactic.jaccard_POS_in_context(sen_1, sen_2, sens)) + "\t" feature_values += str(WordnetBased.wordnet_based_in_context(sen_1, sen_2, sens, 0)) # feature_values += str(WordnetBased.wordnet_based_in_context(sen_1, sen_2, sens, 1)) FileProcess.append_value_to_file(feature_values, filename_output) line = f.readline(); f.close()
def create_input_sen_via_ox_vn(dict_vn, dict_ox): for word in dict_ox: if len(dict_ox[word]) == 0: continue if word in dict_vn: word_syns_vn = dict_vn[word] word_syns_ox = dict_ox[word] if len(word_syns_ox) == 1 and len(word_syns_ox) == 1: continue for i_vn in word_syns_vn: syn_vn = word_syns_vn[i_vn] all_defi_ox = "" for i_ox in word_syns_ox: syn_ox = word_syns_ox[i_ox] if "tv" not in syn_ox: continue defi_ox = syn_ox['d'] all_defi_ox += defi_ox + "\t" flag_can_use = False for i_ox in word_syns_ox: syn_ox = word_syns_ox[i_ox] if "tv" not in syn_ox: continue if check_tv_similar(syn_vn['tv'], syn_ox['tv']) == 1: defi_vn = syn_vn['d'] defi_ox = syn_ox['d'] value = defi_vn + "\t" + defi_ox + all_defi_ox FileProcess.append_value_to_file(value, __filename_input_sen__) FileProcess.append_value_to_file("1", __filename_input_gs__) flag_can_use = True else: if flag_can_use == True: defi_vn = syn_vn['d'] defi_ox = syn_ox['d'] value = defi_vn + "\t" + defi_ox + all_defi_ox FileProcess.append_value_to_file(value, __filename_input_sen__) FileProcess.append_value_to_file("0", __filename_input_gs__)
def cal_features_and_write_to_file_for(syns_wn, syns_ox, filename_output): if len(syns_ox) == 1 and len(syns_wn) > 1: # cal all features between syns in ox with syn in wn syns_values_in_row = [] for i_wn in range(len(syns_wn)): syn_wn = syns_wn[i_wn] syn_ox = syns_ox[str(0)] feature_values = cal_feature_values_for(syn_wn, syn_ox) syns_values_in_row.append(feature_values) # cal max values of each feature arr_root_values_of_feature = [] for i_feature in range(len(syns_values_in_row[0])): root = root_values_of_a_feature_in_row(syns_values_in_row, i_feature) arr_root_values_of_feature.append(root) for i_wn in range(len(syns_wn)): # cal value for svm for i_ox in range(len(syns_ox)): feature_values_for_svm = "" feature_values_1_syn = syns_values_in_row[i_wn] for i_feature in range(len(feature_values_1_syn)): root_value = arr_root_values_of_feature[i_feature] feature_value = feature_values_1_syn[i_feature] feature_value_for_svm = feature_value/root_value feature_values_for_svm += str(feature_value_for_svm) + "\t" if feature_values_for_svm != "": feature_values_for_svm = feature_values_for_svm[:-1] FileProcess.append_value_to_file(feature_values_for_svm, filename_output) else: for i_wn in range(len(syns_wn)): # cal all features between syns in ox with syn in wn syns_values_in_row = [] for i_ox in range(len(syns_ox)): syn_wn = syns_wn[i_wn] syn_ox = syns_ox[str(i_ox)] feature_values = cal_feature_values_for(syn_wn, syn_ox) syns_values_in_row.append(feature_values) # cal max values of each feature arr_root_values_of_feature = [] for i_feature in range(len(syns_values_in_row[0])): root = root_values_of_a_feature_in_row(syns_values_in_row, i_feature) arr_root_values_of_feature.append(root) # cal value for svm for i_ox in range(len(syns_ox)): feature_values_for_svm = "" feature_values_1_syn = syns_values_in_row[i_ox] for i_feature in range(len(feature_values_1_syn)): root_value = arr_root_values_of_feature[i_feature] feature_value = feature_values_1_syn[i_feature] feature_value_for_svm = feature_value/root_value feature_values_for_svm += str(feature_value_for_svm) + "\t" if feature_values_for_svm != "": feature_values_for_svm = feature_values_for_svm[:-1] FileProcess.append_value_to_file(feature_values_for_svm, filename_output)
def write_label_for_svm(syns_wn, syns_ox, dict_gold): for i_wn in range(len(syns_wn)): for i_ox in range(len(syns_ox)): FileProcess.append_value_to_file(dict_gold[i_wn][i_ox], __filename_input_gs_train__)
def create_input_sen_via_gold_data(dict_vn, dict_ox, dict_gold): for word in dict_ox: if len(dict_ox[word]) == 0 or word not in dict_gold: continue if word == "blockage": return if word in dict_vn: word_syns_vn = dict_vn[word] word_syns_ox = dict_ox[word] if len(word_syns_ox) == 1 and len(word_syns_vn) == 1: continue if len(word_syns_ox) == 1 and len(word_syns_vn) > 1: all_defi_vn = "" for i_vn in word_syns_vn: syn_vn = word_syns_vn[i_vn] if "tv" not in syn_vn: continue defi_vn = syn_vn['d'] all_defi_vn += defi_vn + "\t" if all_defi_vn != "": all_defi_vn = all_defi_vn[:-1] for i_vn in word_syns_vn: syn_vn = word_syns_vn[i_vn] for i_ox in word_syns_ox: syn_ox = word_syns_ox[i_ox] if "tv" not in syn_ox: continue defi_vn = syn_vn['d'] defi_ox = syn_ox['d'] value = defi_vn + "\t" + defi_ox + "\t" + all_defi_vn if dict_gold[word][int(i_vn)][int(i_ox)] == "1": FileProcess.append_value_to_file(value, __filename_input_sen__) FileProcess.append_value_to_file("1", __filename_input_gs__) else: FileProcess.append_value_to_file(value, __filename_input_sen__) FileProcess.append_value_to_file("0", __filename_input_gs__) else: for i_vn in word_syns_vn: syn_vn = word_syns_vn[i_vn] all_defi_ox = "" for i_ox in word_syns_ox: syn_ox = word_syns_ox[i_ox] if "tv" not in syn_ox: continue defi_ox = syn_ox['d'] all_defi_ox += defi_ox + "\t" if all_defi_ox != "": all_defi_ox = all_defi_ox[:-1] for i_ox in word_syns_ox: syn_ox = word_syns_ox[i_ox] if "tv" not in syn_ox: continue defi_vn = syn_vn['d'] defi_ox = syn_ox['d'] value = defi_vn + "\t" + defi_ox + "\t" + all_defi_ox if dict_gold[word][int(i_vn)][int(i_ox)] == "1": FileProcess.append_value_to_file(value, __filename_input_sen__) FileProcess.append_value_to_file("1", __filename_input_gs__) else: FileProcess.append_value_to_file(value, __filename_input_sen__) FileProcess.append_value_to_file("0", __filename_input_gs__)