def create_input_for_train(): dict_vn = ReadVietNet.readVietNetFile() dict_ox = OxfordParser.get_dict_nouns() # create_input_sen_via_ox_vn(dict_vn, dict_ox) dict_gold = CompareWithGold.goldData create_input_sen_via_gold_data(dict_vn, dict_ox, dict_gold)
def sim_ox_wn_via_definition_cal_word(): total_precision = 0; total_recall = 0; total_accuracy = 0; total_word = 0 dict_ox = OxfordParser.get_dict_nouns() for word in dict_ox: if word not in __m2d_sim__: m2d_sim = sim_ox_wn_definition(word) __m2d_sim__[word] = m2d_sim m2d_sim = copy.deepcopy(__m2d_sim__[word]) if m2d_sim == None or len(m2d_sim) == 0 or len(m2d_sim[0]) == 0: continue print word # # if len(m2d_sim) == 1 and len(m2d_sim[0]) == 1: # continue # m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0])) (precision, recall, accuracy) = CompareWithGold.compareGoldWithResult(m2d_sim,word) if precision != -1: total_precision += precision total_recall += recall total_accuracy += accuracy total_word += 1 precision = total_precision/total_word recall = total_recall/total_word f_score = 0 if precision != 0 or recall != 0: f_score = 2*(precision*recall)/(precision + recall) accuracy = total_accuracy/total_word print "total:" print total_word print precision print recall print f_score print accuracy Parameters.append_result_to_file( precision, recall, f_score, accuracy)
def create_input_for_test_svm(): dict_ox = OxfordParser.get_dict_nouns() flag_can_go = False for word in dict_ox: # if word == "brook": # flag_can_go = True # # if flag_can_go == False: # continue if len(dict_ox[word]) == 0: continue syns_wn = WordnetHandler.get_synsets_for_word(word, 'n') syns_ox = dict_ox[word] if len(syns_ox) == 1 and len(syns_wn) == 1: continue write_sens_for_reading(syns_wn, syns_ox, __filename_input_sen_test__) cal_features_and_write_to_file_for(syns_wn, syns_ox, __filename_input_test_feature_values__)
def create_input_for_train_svm(): dict_ox = OxfordParser.get_dict_nouns() dict_gold = CompareWithGold.goldData for word in dict_ox: if len(dict_ox[word]) == 0 or word not in dict_gold: continue if word == "brook": return # if word != "bank": # continue syns_wn = WordnetHandler.get_synsets_for_word(word, 'n') syns_ox = dict_ox[word] if len(syns_ox) == 1 and len(syns_wn) == 1: continue write_label_for_svm(syns_wn, syns_ox, dict_gold[word]) write_sens_for_reading(syns_wn, syns_ox, __filename_input_sen_train__) cal_features_and_write_to_file_for(syns_wn, syns_ox, __filename_input_train_feature_values__)
def create_input_sens_for_test_svm(): dict_ox = OxfordParser.get_dict_nouns() for word in dict_ox: parse_ox_wn_defi_to_input(word)
def sim_ox_wn_via_svm(): total_tp = 0.00001 total_tn = 0.00001 total_fn = 0.00001 total_fp = 0.00001 total_pair = 0 dict_ox = OxfordParser.get_dict_nouns() flag_can_go = False for word in dict_ox: # if word == "brook": # flag_can_go = True # # if flag_can_go == False: # continue word_syns_ox = dict_ox[word] wn_synsets = WordnetHandler.get_synsets_for_word(word, "n") m2d_sim = [[0 for x in range(len(word_syns_ox))] for x in range(len(wn_synsets))] if len(word_syns_ox) == 1 and len(wn_synsets) == 1: m2d_sim[0][0] = 1 else: m2d_sim = get_m2d_sim_for_word_from_svm_result(word) if m2d_sim == None: continue # DebugHandler.print_2d_matrix(m2d_sim) m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0])) # DebugHandler.print_2d_matrix(m2d_sim) pair = count_pair(m2d_sim) total_pair += pair (tp, tn, fn, fp) = CompareWithGold.compareGoldWithResult_without_cal_result(m2d_sim,word) if tp != -1: total_tp += tp total_tn += tn total_fn += fn total_fp += fp precision = total_tp / (total_tp + total_fp) recall = total_tp / (total_tp + total_fn) accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn) f_score = 0 if precision != 0 or recall != 0: f_score = 2*(precision*recall)/(precision + recall) print "total:" print total_pair print total_tp print total_tn print total_fn print total_fp print precision print recall print f_score print accuracy Parameters.append_result_to_file( precision, recall, f_score, accuracy) current_params = Parameters.get_current_params() current_params = copy.deepcopy(current_params) return f_score, current_params
def create_input_for_test(): dict_ox = OxfordParser.get_dict_nouns() create_input_sens_test(dict_ox)
def sim_ox_wn_via_definition_morpho_cal_syns(): total_tp = 0.00001 total_tn = 0.00001; total_fn = 0.00001; total_fp = 0.00001; total_pair = 0 dict_ox = OxfordParser.get_dict_nouns() for word in dict_ox: # if word != 'blaze': # continue # if word not in __m2d_sim__: m2d_sim = sim_ox_wn_definition(word) __m2d_sim__[word] = m2d_sim m2d_sim = copy.deepcopy(__m2d_sim__[word]) if m2d_sim == None or len(m2d_sim) == 0 or len(m2d_sim[0]) == 0: continue # if len(m2d_sim) == 1 or len(m2d_sim[0]) == 1: # continue if word not in __dict_ngrams__: m2d_jacc = __m2d_sim_jacc__[word] m2d_2grams = __m2d_sim_2grams__[word] m2d_3grams = __m2d_sim_3grams__[word] m2d_4grams = __m2d_sim_4grams__[word] # DebugHandler.print_2d_matrix(m2d_jacc) # DebugHandler.print_2d_matrix(m2d_2grams) # DebugHandler.print_2d_matrix(m2d_3grams) # DebugHandler.print_2d_matrix(m2d_4grams) # m2d_ngrams = [[0 for x in range(len(m2d_sim[0]))] for x in range(len(m2d_sim))] monogram_weight = 0.25 bigram_weight = 0.25 trigram_weight = 0.25 for i in range(len(m2d_sim)): for j in range(len(m2d_sim[0])): m2d_ngrams[i][j] = m2d_jacc[i][j]*monogram_weight \ + m2d_2grams[i][j]*bigram_weight \ + m2d_3grams[i][j]*(trigram_weight) \ + m2d_4grams[i][j]*(1- monogram_weight - bigram_weight - trigram_weight) __dict_ngrams__[word] = m2d_ngrams # print word m2d_ngrams = __dict_ngrams__[word] # DebugHandler.print_2d_matrix(m2d_ngrams) # DebugHandler.print_2d_matrix(m2d_sim) # ngram_weight = 0.075 # for iWnWord in range(len(m2d_sim)): # for iDictWord in range(len(m2d_sim[0])): # jacc = m2d_jacc[iWnWord][iDictWord] # ngrams = m2d_ngrams[iWnWord][iDictWord] # m2d_jacc[iWnWord][iDictWord] = jacc*(1-ngram_weight) + ngrams*ngram_weight # JACCARD_WEIGHT = Parameters.MORPHO.JACCARD for i in range(len(m2d_sim)): for j in range(len(m2d_sim[0])): m2d_sim[i][j] = m2d_sim[i][j]*(1-JACCARD_WEIGHT) + JACCARD_WEIGHT*(m2d_ngrams[i][j]); # DebugHandler.print_2d_matrix(m2d_sim) # if len(m2d_sim) == 1 and len(m2d_sim[0]) == 1: # continue # m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0])) # m2d_sim = pair_0_1_reducing_m2d_sim(m2d_sim, len(m2d_sim), len(m2d_sim[0]), word) # print word # DebugHandler.print_2d_matrix(m2d_sim) pair = count_pair(m2d_sim) total_pair += pair (tp, tn, fn, fp) = CompareWithGold.compareGoldWithResult_without_cal_result(m2d_sim,word) # precision = tp / (tp + fp + 0.0001) # recall = tp / (tp + fn + 0.0001) # accuracy = (tp + tn) / (tp + tn + fp + fn + 0.0001) # # f_score = 0 # if precision != 0 or recall != 0: # f_score = 2*(precision*recall)/(precision + recall) # if f_score < 0.5: # print word # print f_score # print tp # print tn # print fn # print fp # if tp != -1: total_tp += tp total_tn += tn total_fn += fn total_fp += fp precision = total_tp / (total_tp + total_fp) recall = total_tp / (total_tp + total_fn) accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn) f_score = 0 if precision != 0 or recall != 0: f_score = 2*(precision*recall)/(precision + recall) print "total:" print total_pair print total_tp print total_tn print total_fn print total_fp print precision print recall print f_score print accuracy Parameters.append_result_to_file( precision, recall, f_score, accuracy) current_params = Parameters.get_current_params() current_params = copy.deepcopy(current_params) return f_score, current_params
def sim_ox_wn_via_definition_cal_syns(): total_tp = 0.; total_tn = 0.; total_fn = 0.0; total_fp = 0.0; total_pair = 0 dict_ox = OxfordParser.get_dict_nouns() for word in dict_ox: # if word != 'bank': # continue # if word not in __m2d_sim__: m2d_sim = sim_ox_wn_definition(word) __m2d_sim__[word] = m2d_sim m2d_sim = copy.deepcopy(__m2d_sim__[word]) if m2d_sim == None or len(m2d_sim) == 0 or len(m2d_sim[0]) == 0: continue # if len(m2d_sim) == 1 and len(m2d_sim[0]) == 1: # continue # m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0])) # m2d_sim = pair_0_1_reducing_m2d_sim(m2d_sim, len(m2d_sim), len(m2d_sim[0]), word) # print word pair = count_pair(m2d_sim) total_pair += pair (tp, tn, fn, fp) = CompareWithGold.compareGoldWithResult_without_cal_result(m2d_sim,word) if tp != -1: total_tp += tp total_tn += tn total_fn += fn total_fp += fp precision = total_tp / (total_tp + total_fp) recall = total_tp / (total_tp + total_fn) accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn) f_score = 0 if precision != 0 or recall != 0: f_score = 2*(precision*recall)/(precision + recall) print "total:" print total_pair print total_tp print total_tn print total_fn print total_fp print precision print recall print f_score print accuracy Parameters.append_result_to_file( precision, recall, f_score, accuracy) current_params = Parameters.get_current_params() current_params = copy.deepcopy(current_params) return f_score, current_params