def getBestLambda(lambda_list_to_update_tmp):
    global lambda_list_to_update 
    lambda_list_to_update = lambda_list_to_update_tmp
    VtoE_model.createEntListTable_Spacy('dev')
    EtoV_model.createEntListTable_Spacy('dev')
    EtoV_model.createEntListTable_Stanford('dev')
    VtoE_model.createEntListTable_Stanford('dev')
    # CandidateSet.createCandidateSet(dev_list_EtoV,dev_list_VtoE,'dev')
    ScoreTable.createScoreTable_TypeInSens(dev_list_EtoV,dev_list_VtoE,'dev')
    ScoreTable.createScoreTable_TypeSens(dev_list_EtoV,dev_list_VtoE,'dev')
    list_lambda = init_lambda()
    best_lambda = list_lambda
    best_res = init_result()
    step = 0.1
    while list_lambda != None:
        print('List Lambda ', list_lambda)
        cur_res = train_dev(list_lambda)
        if (better_than(cur_res,best_res)):
            best_lambda = dict((k,v) for k,v in list_lambda.items())
            best_res = cur_res
        list_lambda = update_list_lambda(list_lambda,step)
    print('BestRes ' ,best_res)
    print('BestLambda ', best_lambda)

    return best_lambda
Esempio n. 2
0
def createCandidateSet(EtoV_List, VtoE_List, mode):
    '''
    Create CandidateSet file
    '''

    EtoV_model.createEntListTable_Stanford(mode)
    VtoE_model.createEntListTable_Stanford(mode)
    EtoV_model.createEntListTable_Spacy(mode)
    VtoE_model.createEntListTable_Spacy(mode)
    print('Create Candidate Set')
    if mode == 'dev':
        candidate_set_file = candidate_set_dev_file
    elif mode == 'test':
        candidate_set_file = candidate_set_test_file
    if os.path.isfile(candidate_set_file):
        json_data = open(candidate_set_file).read()
        Candidate_Set_Table = json.loads(json_data)
    else:
        Candidate_Set_Table = []
        for i in range(len(EtoV_List)):
            Candidate_Set_Table.append(
                getCandidateSet(EtoV_List[i], VtoE_List[i], i))
        with open(candidate_set_file, 'w', encoding='utf-8') as f:
            json.dump(Candidate_Set_Table, f)

    return Candidate_Set_Table
Esempio n. 3
0
def main(lambda_list_to_update):
    print(lambda_list_to_update)
    list_lambda = training_TypeInSens.getBestLambda(lambda_list_to_update)
    VtoE_model.createEntListTable_Spacy('test')
    EtoV_model.createEntListTable_Spacy('test')
    EtoV_model.createEntListTable_Stanford('test')
    VtoE_model.createEntListTable_Stanford('test')
    
    # list_lambda = config.getWeight()
    # CandidateSet.createCandidateSet(test_list_EtoV,test_list_VtoE,'test')
    # print("Create Candidate Set")
    ScoreTable.createScoreTable_TypeInSens(test_list_EtoV,test_list_VtoE,'test')
    ScoreTable.createScoreTable_TypeSens(test_list_EtoV,test_list_VtoE,'test')
    print("Created Score Table")
    print(list_lambda)
    predict_set = getPredict.getFinalPredictNEPairList(test_list_EtoV, test_list_VtoE,list_lambda,'test',train_mode_InSens = True, train_mode_Sens=True)
    true_set = TrueSet.getFileTrueSet(test_file_en,test_file_vn)
    # print(predict_set[0])
    # print(true_set[0])
    # EvaluationRes = {'TP':,'TN':,}
    # for i in range(len(predict_set)):
    #     print('=============')
    #     print('Predict', i , len(predict_set[i]))
    #     print(predict_set[i])
    EvaluationRes_type_insen = evaluate_TypeInSens.getMetrics(predict_set,true_set,'test')
    EvaluationRes_type_sen = evaluate_TypeSens.getMetrics(predict_set,true_set)
    print('Type-insensitive ', EvaluationRes_type_insen)
    print('Type-sensitive ', EvaluationRes_type_sen)
Esempio n. 4
0
def getCandidateSet(EtoV_sent, VtoE_sent, sent_index):
    '''
    Get Candidate Set of 1 sentence pair
    '''

    EtoV_set = EtoV_model.getEntSetFromFile(EtoV_sent['Source'],
                                            EtoV_sent['Target'], sent_index)
    VtoE_set = VtoE_model.getEntSetFromFile(VtoE_sent['Source'],
                                            VtoE_sent['Target'], sent_index)
    V_Ent_List = []
    E_Ent_List = []
    for pair in EtoV_set:
        V_Ent_List.append((pair[1], pair[2], pair[4]))
        E_Ent_List.append((pair[0], pair[2], pair[3]))
    for pair in VtoE_set:
        E_Ent_List.append((pair[1], pair[2], pair[4]))
        V_Ent_List.append((pair[0], pair[2], pair[3]))

    res = []
    for en_ent in E_Ent_List:
        for vn_ent in V_Ent_List:
            res.append((en_ent[0], vn_ent[0], en_ent[1], en_ent[2], vn_ent[2],
                        vn_ent[1]))
    res = utilities.make_unique(res)
    return res
Esempio n. 5
0
def evaluateSentencePair(predict, true_set, mode):
    '''
    Compare predict set and true set of a sentence pair
    '''
    print('==================')
    tp = 0
    predict = utilities.make_unique(predict)
    # print('evaluateSentencePair',predict)
    predict = sorted(predict, key=lambda tmp: tmp[0])
    # print('Predict Set', predict)
    # print(predict)
    # print('Predict Set ', predict)
    # print('True Set ', true_set)
    for i in range(len(true_set)):
        true_en_begin = true_set[i][0][0]
        # print('True_en_begin ',true_en_begin)
        true_en_end = true_set[i][0][-1]
        # print('True_en_end ',true_en_end)
        sub_predict = []
        for j in range(len(predict)):
            if len(predict[j][0]) < 1:
                continue
            predict_en_begin = predict[j][0][0]
            # print('PredictEnBegin ', predict_en_begin)
            if predict_en_begin == true_en_begin:
                sub_predict.append(predict[j])
        # print('SubPredict ',sub_predict)
        if (len(sub_predict)) > 0:
            for pair in sub_predict:
                # print(pair)
                predict_en_end = pair[0][-1]
                if predict_en_end == true_en_end:
                    if len(pair[1]) < 1:
                        continue
                    # print('Predict Pair',pair)
                    predict_vi_begin = pair[1][0]
                    predict_vi_end = pair[1][-1]
                    true_vi_begin = true_set[i][1][0]
                    true_vi_end = true_set[i][1][-1]
                    if true_vi_begin == predict_vi_begin and true_vi_end == predict_vi_end:
                        tp += 1
                    else:
                        candidateset = CandidateSet.getCandidateSetFromFile(
                            pair[-2], mode)
                        # print('CandidateSet', candidateset)
                        for k in range(len(candidateset)):
                            print('#####################')
                            print('Candidate', candidateset[k])
                            print(
                                'Score',
                                ScoreTable.getScoreforOneCandidate_TypeInSens(
                                    pair[-2], k))

                        print('--------------')
                        print('True', true_set[i])
                        print('Predict', pair)
                        print(
                            'Score',
                            ScoreTable.getScoreforOneCandidate_TypeInSens(
                                pair[-2], pair[-1]))
                        print('Spacy Vi',
                              VtoE_model.getEntList_Spacy_FromFile(pair[-2]))
                        print('Spacy En',
                              EtoV_model.getEntList_Spacy_FromFile(pair[-2]))
                        print(
                            'Stanford En',
                            EtoV_model.getEntList_StanfordNER_FromFile(
                                pair[-2]))
                        print(
                            'Stanford Vi',
                            VtoE_model.getEntList_StanfordNER_FromFile(
                                pair[-2]))
    # print('TP ', tp)
    return tp