def getBestLambda(lambda_list_to_update_tmp): global lambda_list_to_update lambda_list_to_update = lambda_list_to_update_tmp VtoE_model.createEntListTable_Spacy('dev') EtoV_model.createEntListTable_Spacy('dev') EtoV_model.createEntListTable_Stanford('dev') VtoE_model.createEntListTable_Stanford('dev') # CandidateSet.createCandidateSet(dev_list_EtoV,dev_list_VtoE,'dev') ScoreTable.createScoreTable_TypeInSens(dev_list_EtoV,dev_list_VtoE,'dev') ScoreTable.createScoreTable_TypeSens(dev_list_EtoV,dev_list_VtoE,'dev') list_lambda = init_lambda() best_lambda = list_lambda best_res = init_result() step = 0.1 while list_lambda != None: print('List Lambda ', list_lambda) cur_res = train_dev(list_lambda) if (better_than(cur_res,best_res)): best_lambda = dict((k,v) for k,v in list_lambda.items()) best_res = cur_res list_lambda = update_list_lambda(list_lambda,step) print('BestRes ' ,best_res) print('BestLambda ', best_lambda) return best_lambda
def createCandidateSet(EtoV_List, VtoE_List, mode): ''' Create CandidateSet file ''' EtoV_model.createEntListTable_Stanford(mode) VtoE_model.createEntListTable_Stanford(mode) EtoV_model.createEntListTable_Spacy(mode) VtoE_model.createEntListTable_Spacy(mode) print('Create Candidate Set') if mode == 'dev': candidate_set_file = candidate_set_dev_file elif mode == 'test': candidate_set_file = candidate_set_test_file if os.path.isfile(candidate_set_file): json_data = open(candidate_set_file).read() Candidate_Set_Table = json.loads(json_data) else: Candidate_Set_Table = [] for i in range(len(EtoV_List)): Candidate_Set_Table.append( getCandidateSet(EtoV_List[i], VtoE_List[i], i)) with open(candidate_set_file, 'w', encoding='utf-8') as f: json.dump(Candidate_Set_Table, f) return Candidate_Set_Table
def main(lambda_list_to_update): print(lambda_list_to_update) list_lambda = training_TypeInSens.getBestLambda(lambda_list_to_update) VtoE_model.createEntListTable_Spacy('test') EtoV_model.createEntListTable_Spacy('test') EtoV_model.createEntListTable_Stanford('test') VtoE_model.createEntListTable_Stanford('test') # list_lambda = config.getWeight() # CandidateSet.createCandidateSet(test_list_EtoV,test_list_VtoE,'test') # print("Create Candidate Set") ScoreTable.createScoreTable_TypeInSens(test_list_EtoV,test_list_VtoE,'test') ScoreTable.createScoreTable_TypeSens(test_list_EtoV,test_list_VtoE,'test') print("Created Score Table") print(list_lambda) predict_set = getPredict.getFinalPredictNEPairList(test_list_EtoV, test_list_VtoE,list_lambda,'test',train_mode_InSens = True, train_mode_Sens=True) true_set = TrueSet.getFileTrueSet(test_file_en,test_file_vn) # print(predict_set[0]) # print(true_set[0]) # EvaluationRes = {'TP':,'TN':,} # for i in range(len(predict_set)): # print('=============') # print('Predict', i , len(predict_set[i])) # print(predict_set[i]) EvaluationRes_type_insen = evaluate_TypeInSens.getMetrics(predict_set,true_set,'test') EvaluationRes_type_sen = evaluate_TypeSens.getMetrics(predict_set,true_set) print('Type-insensitive ', EvaluationRes_type_insen) print('Type-sensitive ', EvaluationRes_type_sen)
def getCandidateSet(EtoV_sent, VtoE_sent, sent_index): ''' Get Candidate Set of 1 sentence pair ''' EtoV_set = EtoV_model.getEntSetFromFile(EtoV_sent['Source'], EtoV_sent['Target'], sent_index) VtoE_set = VtoE_model.getEntSetFromFile(VtoE_sent['Source'], VtoE_sent['Target'], sent_index) V_Ent_List = [] E_Ent_List = [] for pair in EtoV_set: V_Ent_List.append((pair[1], pair[2], pair[4])) E_Ent_List.append((pair[0], pair[2], pair[3])) for pair in VtoE_set: E_Ent_List.append((pair[1], pair[2], pair[4])) V_Ent_List.append((pair[0], pair[2], pair[3])) res = [] for en_ent in E_Ent_List: for vn_ent in V_Ent_List: res.append((en_ent[0], vn_ent[0], en_ent[1], en_ent[2], vn_ent[2], vn_ent[1])) res = utilities.make_unique(res) return res
def evaluateSentencePair(predict, true_set, mode): ''' Compare predict set and true set of a sentence pair ''' print('==================') tp = 0 predict = utilities.make_unique(predict) # print('evaluateSentencePair',predict) predict = sorted(predict, key=lambda tmp: tmp[0]) # print('Predict Set', predict) # print(predict) # print('Predict Set ', predict) # print('True Set ', true_set) for i in range(len(true_set)): true_en_begin = true_set[i][0][0] # print('True_en_begin ',true_en_begin) true_en_end = true_set[i][0][-1] # print('True_en_end ',true_en_end) sub_predict = [] for j in range(len(predict)): if len(predict[j][0]) < 1: continue predict_en_begin = predict[j][0][0] # print('PredictEnBegin ', predict_en_begin) if predict_en_begin == true_en_begin: sub_predict.append(predict[j]) # print('SubPredict ',sub_predict) if (len(sub_predict)) > 0: for pair in sub_predict: # print(pair) predict_en_end = pair[0][-1] if predict_en_end == true_en_end: if len(pair[1]) < 1: continue # print('Predict Pair',pair) predict_vi_begin = pair[1][0] predict_vi_end = pair[1][-1] true_vi_begin = true_set[i][1][0] true_vi_end = true_set[i][1][-1] if true_vi_begin == predict_vi_begin and true_vi_end == predict_vi_end: tp += 1 else: candidateset = CandidateSet.getCandidateSetFromFile( pair[-2], mode) # print('CandidateSet', candidateset) for k in range(len(candidateset)): print('#####################') print('Candidate', candidateset[k]) print( 'Score', ScoreTable.getScoreforOneCandidate_TypeInSens( pair[-2], k)) print('--------------') print('True', true_set[i]) print('Predict', pair) print( 'Score', ScoreTable.getScoreforOneCandidate_TypeInSens( pair[-2], pair[-1])) print('Spacy Vi', VtoE_model.getEntList_Spacy_FromFile(pair[-2])) print('Spacy En', EtoV_model.getEntList_Spacy_FromFile(pair[-2])) print( 'Stanford En', EtoV_model.getEntList_StanfordNER_FromFile( pair[-2])) print( 'Stanford Vi', VtoE_model.getEntList_StanfordNER_FromFile( pair[-2])) # print('TP ', tp) return tp