def main(): EtoV_dev_list = utilities.read_align_file('../../../Alignment_Split/EtoV_Test.txt') VtoE_dev_list = utilities.read_align_file('../../../Alignment_Split/VtoE_Test.txt') # CandidateSet.createCandidateSet(EtoV_dev_list,VtoE_dev_list,'test') pair = ([10, 11], [12, 13, 14, 15], 'PERSON', 'Richard Matsch', 'Thẩm phán Richard Matsch', 'PERSON', 2, 1) print('Candidate',CandidateSet.getCandidateSetFromFile(2)[7],'test') tmp = Transliteration.getTransliterationProb(pair,'') print(tmp)
def main(): createEntListTable_Stanford() createEntListTable_Spacy() EtoV_dev_list = utilities.read_align_file( '../../Alignment_Split/EtoV_Dev.txt') # pairs = getEntSetFromFile(EtoV_dev_list[0]['Source'],EtoV_dev_list[0]['Target'],0) # print(pairs) source_sent = EtoV_dev_list[0]['Source'] target_sent = EtoV_dev_list[0]['Target'] source_ent_list = getEntList_StanfordNER_FromFile(0) target_ent_list = getTargetEntList(source_sent, target_sent, source_ent_list) for i in range(len(source_ent_list)): # print('Pair', source_ent_list[i],target_ent_list[i]) align_score = getAlignScore(source_ent_list[i], target_ent_list[i], 0) # print('Score', align_score) vn_list = list(alignment_table.VN) en_list = list(alignment_table.EN) prob = list(alignment_table.Prob) # print(len(alignment_table.index)) # print(len(en_list)) # print(len(vn_list)) # print(len(prob)) # for i in range(6465): # print(vn_list[i],en_list[i],prob[i]) # print(len(alignment_table.index)) # print(vn_list[153610],en_list[153610],prob[153610]) print(alignment_table.shape)
def main(): # createEntListTable_Stanford() # VtoE_dev_list = utilities.read_align_file('../../Alignment_Split/VtoE_Dev.txt') # # pair = getEntSetFromFile(EtoV_dev_list[0]['Source'],EtoV_dev_list[0]['Target'],0) # source_sent = VtoE_dev_list[0]['Source'] # target_sent = VtoE_dev_list[0]['Target'] # source_ent_list = getEntList_StanfordNER_FromFile(0) # target_ent_list = getTargetEntList(source_sent,target_sent,source_ent_list) # align_score = getAlignScore(source_ent_list[0],target_ent_list[0],0) # print(align_score) createEntListTable_Stanford() createEntListTable_Spacy() VtoE_dev_list = utilities.read_align_file( '../../Alignment_Split/VtoE_Dev.txt') pair = getEntSetFromFile(VtoE_dev_list[0]['Source'], VtoE_dev_list[0]['Target'], 0)
def main(): NEPair = [[11, 12], [15, 16], "LOCATION", "El Nino", "El Nino", "LOCATION"] VtoE_sent = read_align_file('../../../Alignment_Split/VtoE_Dev.txt')[0] tmp = getNETranslationProb(NEPair, VtoE_sent)
import evaluate_TypeSens import config import TrueSet import ScoreTable import CandidateSet lambda_list_to_update = ['mono_en','mono_vn','bi'] lambda_step = 0.1 cur_Count = 0 dev_file_EtoV = config.align_file_EtoV_dev dev_file_VtoE = config.align_file_VtoE_dev dev_file_en = config.en_file_dev dev_file_vn = config.vn_file_dev dev_list_EtoV = read_align_file(dev_file_EtoV) dev_list_VtoE = read_align_file(dev_file_VtoE) config_file = 'config.ini' trueSet = TrueSet.getFileTrueSet(dev_file_en,dev_file_vn) #MAIN def train_dev(list_lambda): """[summary] Arguments: list_lambda {[float]} -- [weight of features] output: Final Res of that list of lambda res =
''' from AlignmentModel import EtoV_model, VtoE_model import utilities import json import os.path import config candidate_set_test_file = config.candidate_set_test_file candidate_set_dev_file = config.candidate_set_dev_file Candidate_Set_Table_Dev = None Candidate_Set_Table_Test = None EtoV_Dev = utilities.read_align_file(config.align_file_EtoV_dev) VtoE_Dev = utilities.read_align_file(config.align_file_VtoE_dev) EtoV_Test = utilities.read_align_file(config.align_file_EtoV_test) VtoE_Test = utilities.read_align_file(config.align_file_VtoE_test) def createCandidateSet(EtoV_List, VtoE_List, mode): ''' Create CandidateSet file ''' EtoV_model.createEntListTable_Stanford(mode) VtoE_model.createEntListTable_Stanford(mode) EtoV_model.createEntListTable_Spacy(mode) VtoE_model.createEntListTable_Spacy(mode)
align_file_VtoE = '../../Alignment_Split/VtoE_Test.txt' test_file_en = '../../Data/corpora/0_DATA/3_Test/test_eng' test_file_vn = '../../Data/corpora/0_DATA/3_Test/test_viet' config_file = 'config.ini' outputfile = '' list_lambda = config.getWeight() # for key,value in list_lambda.items(): # list_lambda[key] = float(list_lambda[key]) # for key,value in list_lambda.items(): # print(list_lambda[key]) # print(type(list_lambda[key])) test_list_EtoV = read_align_file(align_file_EtoV) test_list_VtoE = read_align_file(align_file_VtoE) def main(lambda_list_to_update): print(lambda_list_to_update) list_lambda = training_TypeInSens.getBestLambda(lambda_list_to_update) CandidateSet.createCandidateSet(test_list_EtoV, test_list_VtoE, 'test') print("Create Candidate Set") ScoreTable.createScoreTable_TypeInSens(test_list_EtoV, test_list_VtoE, 'test') ScoreTable.createScoreTable_TypeSens(test_list_EtoV, test_list_VtoE, 'test') print("Created Score Table") print(list_lambda)