Esempio n. 1
0
def main(lambda_list_to_update):
    print(lambda_list_to_update)
    list_lambda = training_TypeInSens.getBestLambda(lambda_list_to_update)

    CandidateSet.createCandidateSet(test_list_EtoV, test_list_VtoE, 'test')
    print("Create Candidate Set")
    ScoreTable.createScoreTable_TypeInSens(test_list_EtoV, test_list_VtoE,
                                           'test')
    ScoreTable.createScoreTable_TypeSens(test_list_EtoV, test_list_VtoE,
                                         'test')
    print("Created Score Table")
    print(list_lambda)
    predict_set = getPredict.getFinalPredictNEPairList(test_list_EtoV,
                                                       test_list_VtoE,
                                                       list_lambda,
                                                       'test',
                                                       train_mode_InSens=True,
                                                       train_mode_Sens=True)
    true_set = TrueSet.getFileTrueSet(test_file_en, test_file_vn)
    # print(predict_set[0])
    # print(true_set[0])
    # EvaluationRes = {'TP':,'TN':,}
    # for i in range(len(predict_set)):
    #     print('=============')
    #     print('Predict', i , len(predict_set[i]))
    #     print(predict_set[i])
    EvaluationRes_type_insen = evaluate_TypeInSens.getMetrics(
        predict_set, true_set)
    EvaluationRes_type_sen = evaluate_TypeSens.getMetrics(
        predict_set, true_set)
    print('Type-insensitive ', EvaluationRes_type_insen)
    print('Type-sensitive ', EvaluationRes_type_sen)
def getBestLambda(lambda_list_to_update_tmp):
    global lambda_list_to_update 
    lambda_list_to_update = lambda_list_to_update_tmp
    VtoE_model.createEntListTable_Spacy('dev')
    EtoV_model.createEntListTable_Spacy('dev')
    EtoV_model.createEntListTable_Stanford('dev')
    VtoE_model.createEntListTable_Stanford('dev')
    # CandidateSet.createCandidateSet(dev_list_EtoV,dev_list_VtoE,'dev')
    ScoreTable.createScoreTable_TypeInSens(dev_list_EtoV,dev_list_VtoE,'dev')
    ScoreTable.createScoreTable_TypeSens(dev_list_EtoV,dev_list_VtoE,'dev')
    list_lambda = init_lambda()
    best_lambda = list_lambda
    best_res = init_result()
    step = 0.1
    while list_lambda != None:
        print('List Lambda ', list_lambda)
        cur_res = train_dev(list_lambda)
        if (better_than(cur_res,best_res)):
            best_lambda = dict((k,v) for k,v in list_lambda.items())
            best_res = cur_res
        list_lambda = update_list_lambda(list_lambda,step)
    print('BestRes ' ,best_res)
    print('BestLambda ', best_lambda)

    return best_lambda
Esempio n. 3
0
def main():
    # EtoV_dev_list = utilities.read_align_file('../../../Alignment_Split/EtoV_Dev.txt')
    # VtoE_dev_list = utilities.read_align_file('../../../Alignment_Split/VtoE_Dev.txt')
    # getCandidateSet.createCandidateSet(EtoV_dev_list,VtoE_dev_list,'dev')
    ScoreTable.createScoreTable_TypeSens(EtoV_dev_list, VtoE_dev_list, 'dev')
    ScoreTable.createScoreTable_TypeInSens(EtoV_dev_list, VtoE_dev_list, 'dev')
    # k = 0
    for i in range(len(EtoV_dev_list)):
        # k +=1
        # if (k>100):
        # break
        cur_candidate_list = getCandidateSet.getCandidateSetFromFile(i)
        for candidate in cur_candidate_list:
            tmp = Distortion.getDistortionprob(candidate, EtoV_dev_list[i],
                                               VtoE_dev_list[i])
def main():
    # CandidateSet.createCandidateSet(dev_list_EtoV,dev_list_VtoE,'dev')
    ScoreTable.createScoreTable_TypeInSens(dev_list_EtoV,dev_list_VtoE,'dev')
    ScoreTable.createScoreTable_TypeSens(dev_list_EtoV,dev_list_VtoE,'dev')
    list_lambda = init_lambda()
    best_lambda = list_lambda
    best_res = init_result()
    while list_lambda != None:
        print('List Lambda ', list_lambda)
        cur_res = train_dev(list_lambda)
        # print('Res', cur_res)
        if (better_than(cur_res,best_res)):
            best_lambda = dict((k,v) for k,v in list_lambda.items())
            best_res = cur_res
        list_lambda = update_list_lambda(list_lambda,lambda_step)
        
    print('BestRes ' ,best_res)
    print('BestLambda ', best_lambda)
    config.WriteBestLambda_TypeSens(best_lambda)
Esempio n. 5
0
def main():
    ScoreTable.createScoreTable_TypeInSens(dev_list_EtoV, dev_list_VtoE)
    CandidateSet.createCandidateSetForTraining(dev_list_EtoV, dev_list_VtoE)
    print('Created Candidate Set')
    list_lambda = init_lambda(4)
    best_lambda = list_lambda
    best_res = init_result()
    step = 0.1
    while list_lambda != None:
        print('List Lambda ', list_lambda)
        cur_res = train_dev(list_lambda)
        if (better_than(cur_res, best_res)):
            best_lambda = dict((k, v) for k, v in list_lambda.items())
            best_res = cur_res
        list_lambda = update_list_lambda(list_lambda, step, 4)

    print('BestRes ', best_res)
    print('BestLambda ', best_lambda)
    config.WriteBestLambda(best_lambda)
Esempio n. 6
0
def getCombineScoreCandidate(cur_candidate,EtoV_sent,VtoE_sent,weight_dict,sent_index,candidate_index,train_mode = False):
    '''

    '''
    score = 0.0
    if train_mode:
        score_dict = ScoreTable.getScoreforOneCandidate_TypeInSens(sent_index,candidate_index)
    else:
        score_dict = getScoreDict_TypeInSens(cur_candidate, EtoV_sent, VtoE_sent)
    score = getDictDot(score_dict,weight_dict)
    return score
Esempio n. 7
0
def getBestLambda(lambda_list_to_update_tmp):
    global lambda_list_to_update
    lambda_list_to_update = lambda_list_to_update_tmp
    CandidateSet.createCandidateSet(dev_list_EtoV,dev_list_VtoE,'dev')
    ScoreTable.createScoreTable_TypeInSens(dev_list_EtoV,dev_list_VtoE,'dev')
    ScoreTable.createScoreTable_TypeSens(dev_list_EtoV,dev_list_VtoE,'dev')
    list_lambda = init_lambda()
    best_lambda = list_lambda
    best_res = init_result()
    while list_lambda != None:
        print('List Lambda ', list_lambda)
        cur_res = train_dev(list_lambda)
        # print('Res', cur_res)
        if (better_than(cur_res,best_res)):
            best_lambda = dict((k,v) for k,v in list_lambda.items())
            best_res = cur_res
        list_lambda = update_list_lambda(list_lambda,lambda_step)
        
    print('BestRes ' ,best_res)
    print('BestLambda ', best_lambda)
    return best_lambda
Esempio n. 8
0
def evaluateSentencePair(predict, true_set, mode):
    '''
    Compare predict set and true set of a sentence pair
    '''
    print('==================')
    tp = 0
    predict = utilities.make_unique(predict)
    # print('evaluateSentencePair',predict)
    predict = sorted(predict, key=lambda tmp: tmp[0])
    # print('Predict Set', predict)
    # print(predict)
    # print('Predict Set ', predict)
    # print('True Set ', true_set)
    for i in range(len(true_set)):
        true_en_begin = true_set[i][0][0]
        # print('True_en_begin ',true_en_begin)
        true_en_end = true_set[i][0][-1]
        # print('True_en_end ',true_en_end)
        sub_predict = []
        for j in range(len(predict)):
            if len(predict[j][0]) < 1:
                continue
            predict_en_begin = predict[j][0][0]
            # print('PredictEnBegin ', predict_en_begin)
            if predict_en_begin == true_en_begin:
                sub_predict.append(predict[j])
        # print('SubPredict ',sub_predict)
        if (len(sub_predict)) > 0:
            for pair in sub_predict:
                # print(pair)
                predict_en_end = pair[0][-1]
                if predict_en_end == true_en_end:
                    if len(pair[1]) < 1:
                        continue
                    # print('Predict Pair',pair)
                    predict_vi_begin = pair[1][0]
                    predict_vi_end = pair[1][-1]
                    true_vi_begin = true_set[i][1][0]
                    true_vi_end = true_set[i][1][-1]
                    if true_vi_begin == predict_vi_begin and true_vi_end == predict_vi_end:
                        tp += 1
                    else:
                        candidateset = CandidateSet.getCandidateSetFromFile(
                            pair[-2], mode)
                        # print('CandidateSet', candidateset)
                        for k in range(len(candidateset)):
                            print('#####################')
                            print('Candidate', candidateset[k])
                            print(
                                'Score',
                                ScoreTable.getScoreforOneCandidate_TypeInSens(
                                    pair[-2], k))

                        print('--------------')
                        print('True', true_set[i])
                        print('Predict', pair)
                        print(
                            'Score',
                            ScoreTable.getScoreforOneCandidate_TypeInSens(
                                pair[-2], pair[-1]))
                        print('Spacy Vi',
                              VtoE_model.getEntList_Spacy_FromFile(pair[-2]))
                        print('Spacy En',
                              EtoV_model.getEntList_Spacy_FromFile(pair[-2]))
                        print(
                            'Stanford En',
                            EtoV_model.getEntList_StanfordNER_FromFile(
                                pair[-2]))
                        print(
                            'Stanford Vi',
                            VtoE_model.getEntList_StanfordNER_FromFile(
                                pair[-2]))
    # print('TP ', tp)
    return tp
    parser.add_argument("--starting_temperature", default=1000, type=float, help="default=1.0")
    parser.add_argument("--cooling_rate", default=0.1, type=float, help="default=0.1")
    parser.add_argument("--steps_per_temp", default=1000, type=int, help="default=1000")
    parser.add_argument("--quench", action="store_true", default=False, help="default=False")

    parser.add_argument("--initialization", default="random", choices=["random", "lowdens", "lowrmsd" ], type=str, help="")

    # multi thread
    parser.add_argument("--multiprocessor", action="store_true", default=False, help="")
    parser.add_argument("--nprocs", type=int, help="numbers of CPUs you are going to use, default=20")
    args = parser.parse_args()
    print print_args( args )

    wts = Weights( args.density_score_wt, args.overlap_score_wt, args.closab_score_wt, args.clash_score_wt )

    scoretable = ScoreTable( args.selected_frags_path )
    scoretable.score_files_reader( args.density_scorefile, args.overlap_scorefile, args.nonoverlap_scorefile )

    scorefxn = ScoreFunction( scoretable, wts, args.null_frag_score )

    if args.multiprocessor:
        if not args.nprocs: n_procs = processors_availabilty( total_processors() )
        else: n_procs = args.nprocs

        to_pass = [ ( scorefxn, args, runid ) for runid in range( args.nstruct ) ]
        myPool = Pool( processes = n_procs )
        myResults = myPool.map_async( run_annealer, to_pass )
        myResults.get()

    else:
        for run_id in range( args.nstruct ):