def sim_ox_wn_via_definition_cal_word(): total_precision = 0; total_recall = 0; total_accuracy = 0; total_word = 0 dict_ox = OxfordParser.get_dict_nouns() for word in dict_ox: if word not in __m2d_sim__: m2d_sim = sim_ox_wn_definition(word) __m2d_sim__[word] = m2d_sim m2d_sim = copy.deepcopy(__m2d_sim__[word]) if m2d_sim == None or len(m2d_sim) == 0 or len(m2d_sim[0]) == 0: continue print word # # if len(m2d_sim) == 1 and len(m2d_sim[0]) == 1: # continue # m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0])) (precision, recall, accuracy) = CompareWithGold.compareGoldWithResult(m2d_sim,word) if precision != -1: total_precision += precision total_recall += recall total_accuracy += accuracy total_word += 1 precision = total_precision/total_word recall = total_recall/total_word f_score = 0 if precision != 0 or recall != 0: f_score = 2*(precision*recall)/(precision + recall) accuracy = total_accuracy/total_word print "total:" print total_word print precision print recall print f_score print accuracy Parameters.append_result_to_file( precision, recall, f_score, accuracy)
def train_sim_definition(): Parameters.reset_params_zero() (f_score, curr_params) = jaccard_train() Parameters.PARAMETERS_CHOICE_0_1.CHOICE_1_1_MIN = curr_params[0] Parameters.PARAMETERS_CHOICE_0_1.CHOICE_1_COL_MIN_FIRST = curr_params[1] Parameters.PARAMETERS_CHOICE_0_1.CHOICE_1_COL_RANGE_FIRST = curr_params[2] Parameters.PARAMETERS_CHOICE_0_1.CHOICE_N_N_MIN_FIRST = curr_params[3] Parameters.PARAMETERS_CHOICE_0_1.CHOICE_N_N_RANGE_FIRST = curr_params[4] Parameters.MORPHO.JACCARD = curr_params[5] # # Parameters.PARAMETERS_CHOICE_0_1.CHOICE_1_1_MIN = 0 # Parameters.PARAMETERS_CHOICE_0_1.CHOICE_1_COL_MIN_FIRST = 0.0 # Parameters.PARAMETERS_CHOICE_0_1.CHOICE_1_COL_RANGE_FIRST = 1.0 # Parameters.PARAMETERS_CHOICE_0_1.CHOICE_N_N_MIN_FIRST = 1 # Parameters.PARAMETERS_CHOICE_0_1.CHOICE_N_N_RANGE_FIRST = 1.25 # Parameters.MORPHO.JACCARD = 0.9 # sim_ox_wn_via_definition()
def sim_ox_wn_via_definition_morpho_cal_syns(): total_tp = 0.00001 total_tn = 0.00001; total_fn = 0.00001; total_fp = 0.00001; total_pair = 0 dict_ox = OxfordParser.get_dict_nouns() for word in dict_ox: # if word != 'blaze': # continue # if word not in __m2d_sim__: m2d_sim = sim_ox_wn_definition(word) __m2d_sim__[word] = m2d_sim m2d_sim = copy.deepcopy(__m2d_sim__[word]) if m2d_sim == None or len(m2d_sim) == 0 or len(m2d_sim[0]) == 0: continue # if len(m2d_sim) == 1 or len(m2d_sim[0]) == 1: # continue if word not in __dict_ngrams__: m2d_jacc = __m2d_sim_jacc__[word] m2d_2grams = __m2d_sim_2grams__[word] m2d_3grams = __m2d_sim_3grams__[word] m2d_4grams = __m2d_sim_4grams__[word] # DebugHandler.print_2d_matrix(m2d_jacc) # DebugHandler.print_2d_matrix(m2d_2grams) # DebugHandler.print_2d_matrix(m2d_3grams) # DebugHandler.print_2d_matrix(m2d_4grams) # m2d_ngrams = [[0 for x in range(len(m2d_sim[0]))] for x in range(len(m2d_sim))] monogram_weight = 0.25 bigram_weight = 0.25 trigram_weight = 0.25 for i in range(len(m2d_sim)): for j in range(len(m2d_sim[0])): m2d_ngrams[i][j] = m2d_jacc[i][j]*monogram_weight \ + m2d_2grams[i][j]*bigram_weight \ + m2d_3grams[i][j]*(trigram_weight) \ + m2d_4grams[i][j]*(1- monogram_weight - bigram_weight - trigram_weight) __dict_ngrams__[word] = m2d_ngrams # print word m2d_ngrams = __dict_ngrams__[word] # DebugHandler.print_2d_matrix(m2d_ngrams) # DebugHandler.print_2d_matrix(m2d_sim) # ngram_weight = 0.075 # for iWnWord in range(len(m2d_sim)): # for iDictWord in range(len(m2d_sim[0])): # jacc = m2d_jacc[iWnWord][iDictWord] # ngrams = m2d_ngrams[iWnWord][iDictWord] # m2d_jacc[iWnWord][iDictWord] = jacc*(1-ngram_weight) + ngrams*ngram_weight # JACCARD_WEIGHT = Parameters.MORPHO.JACCARD for i in range(len(m2d_sim)): for j in range(len(m2d_sim[0])): m2d_sim[i][j] = m2d_sim[i][j]*(1-JACCARD_WEIGHT) + JACCARD_WEIGHT*(m2d_ngrams[i][j]); # DebugHandler.print_2d_matrix(m2d_sim) # if len(m2d_sim) == 1 and len(m2d_sim[0]) == 1: # continue # m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0])) # m2d_sim = pair_0_1_reducing_m2d_sim(m2d_sim, len(m2d_sim), len(m2d_sim[0]), word) # print word # DebugHandler.print_2d_matrix(m2d_sim) pair = count_pair(m2d_sim) total_pair += pair (tp, tn, fn, fp) = CompareWithGold.compareGoldWithResult_without_cal_result(m2d_sim,word) # precision = tp / (tp + fp + 0.0001) # recall = tp / (tp + fn + 0.0001) # accuracy = (tp + tn) / (tp + tn + fp + fn + 0.0001) # # f_score = 0 # if precision != 0 or recall != 0: # f_score = 2*(precision*recall)/(precision + recall) # if f_score < 0.5: # print word # print f_score # print tp # print tn # print fn # print fp # if tp != -1: total_tp += tp total_tn += tn total_fn += fn total_fp += fp precision = total_tp / (total_tp + total_fp) recall = total_tp / (total_tp + total_fn) accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn) f_score = 0 if precision != 0 or recall != 0: f_score = 2*(precision*recall)/(precision + recall) print "total:" print total_pair print total_tp print total_tn print total_fn print total_fp print precision print recall print f_score print accuracy Parameters.append_result_to_file( precision, recall, f_score, accuracy) current_params = Parameters.get_current_params() current_params = copy.deepcopy(current_params) return f_score, current_params
def sim_ox_wn_via_definition_cal_syns(): total_tp = 0.; total_tn = 0.; total_fn = 0.0; total_fp = 0.0; total_pair = 0 dict_ox = OxfordParser.get_dict_nouns() for word in dict_ox: # if word != 'bank': # continue # if word not in __m2d_sim__: m2d_sim = sim_ox_wn_definition(word) __m2d_sim__[word] = m2d_sim m2d_sim = copy.deepcopy(__m2d_sim__[word]) if m2d_sim == None or len(m2d_sim) == 0 or len(m2d_sim[0]) == 0: continue # if len(m2d_sim) == 1 and len(m2d_sim[0]) == 1: # continue # m2d_sim = choose_pair_0_1(m2d_sim, len(m2d_sim), len(m2d_sim[0])) # m2d_sim = pair_0_1_reducing_m2d_sim(m2d_sim, len(m2d_sim), len(m2d_sim[0]), word) # print word pair = count_pair(m2d_sim) total_pair += pair (tp, tn, fn, fp) = CompareWithGold.compareGoldWithResult_without_cal_result(m2d_sim,word) if tp != -1: total_tp += tp total_tn += tn total_fn += fn total_fp += fp precision = total_tp / (total_tp + total_fp) recall = total_tp / (total_tp + total_fn) accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn) f_score = 0 if precision != 0 or recall != 0: f_score = 2*(precision*recall)/(precision + recall) print "total:" print total_pair print total_tp print total_tn print total_fn print total_fp print precision print recall print f_score print accuracy Parameters.append_result_to_file( precision, recall, f_score, accuracy) current_params = Parameters.get_current_params() current_params = copy.deepcopy(current_params) return f_score, current_params