def main(): # load the parameter from the command line opt = parser.parse_args() # run the beer to get the beer scores for s1 and s2 beer1 = "./code/beer_2.0/beer -s " + opt.s1 + " -r " + opt.ref + " --printSentScores > ./tmp/tmp1" beer2 = "./code/beer_2.0/beer -s " + opt.s2 + " -r " + opt.ref + " --printSentScores > ./tmp/tmp2" print beer1 print beer2 os.system(beer1) os.system(beer2) # extract the score form the file and remove the last line of the tmp file, it contains the total valuation tmp_sc1 = [ float(li.strip('\n').split(' ')[-1]) for li in open('./tmp/tmp1') ] tmp_sc1.pop(-1) tmp_sc2 = [ float(li.strip('\n').split(' ')[-1]) for li in open('./tmp/tmp2') ] tmp_sc2.pop(-1) # transform the da data to rr data, darr1 is the human scores darr = daToRr(tmp_sc1, tmp_sc2, float(opt.threshold)) writefile(darr, './tmp/darr') # read the human socres rr = [int(li.rstrip('\n')) for li in open(opt.scores)] taur = valTauLike(rr, darr) writefile(taur, './tmp/taur', 'a') print taur
def _calcu_fitness(self): # specific scores = '/Users/ihuangyiran/Documents/Workplace_Python/data/MasterArbeit/plan_c_source_rank_de.en/extracted_data_2016/data_scores' tgt = [int(li.rstrip('\n')) for li in open(scores)] root = '/tmp/decMixture_2016_' bs, nd = np.load(root + 's1_0.npy').shape s1 = np.zeros([bs, nd]) s2 = np.zeros([bs, nd]) ref = np.zeros([bs, nd]) values = [ self.genes[i].get_value() for i in range(self.num_genes) if self.genes[i].get_key() ] s = sum(values) for i in range(self.num_genes): if self.genes[i].get_key(): ratio = self.genes[i].get_value() s1 += (ratio * 1. / s) * np.load(root + 's1_' + str(i) + '.npy') s2 += (ratio * 1. / s) * np.load(root + 's2_' + str(i) + '.npy') ref += (ratio * 1. / s) * np.load(root + 'ref_' + str(i) + '.npy') d1 = [np.linalg.norm(l1 - l2, ord=1) for l1, l2 in zip(s1, ref)] d2 = [np.linalg.norm(l1 - l2, ord=1) for l1, l2 in zip(s2, ref)] c = [self._compare(l1, l2) for l1, l2 in zip(d1, d2)] taul = valTauLike(tgt, c) #print(taul) return taul
def main(): # load the parameter from the command line opt = parser.parse_args() # run get_embeddings to get the word embeddings for the input comm_ref= "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model "+opt.model+" -type "+opt.type+" -src "+opt.ref+" -output /tmp/data_ref" comm_s1 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model "+opt.model+" -type "+opt.type+" -src "+opt.s1+" -output /tmp/data_s1" comm_s2 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model "+opt.model+" -type "+opt.type+" -src "+opt.s2+" -output /tmp/data_s2" if opt.type == 'decoder_hidden': comm_ref= "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model "+opt.model+" -type "+opt.type+" -src "+opt.src+" -output /tmp/data_ref -tgt " + opt.ref comm_s1 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model "+opt.model+" -type "+opt.type+" -src "+opt.src+" -output /tmp/data_s1 -tgt " + opt.s1 comm_s2 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model "+opt.model+" -type "+opt.type+" -src "+opt.src+" -output /tmp/data_s2 -tgt " + opt.s2 print comm_ref print comm_s1 print comm_s2 os.system(comm_ref) os.system(comm_s1) os.system(comm_s2) # run test to get process the word embeddings print (">>> start getting the chrF scores") chrF1= "python code/NNMetric/test3.py -hyp /tmp/data_s1 -ref /tmp/data_ref -join " + opt.mode + " > ./tmp/tmp1" chrF2= "python code/NNMetric/test3.py -hyp /tmp/data_s2 -ref /tmp/data_ref -join " + opt.mode + " > ./tmp/tmp2" # chrF1= "python code/NNMetric/test.py -hyp " + opt.s1 + " -ref " + opt.ref + " -w2v data/word2vec/GoogleNews-vectors-negative300-SLIM.bin.gz -join max > ./tmp/tmp1" # chrF2= "python code/NNMetric/test.py -hyp " + opt.s2 + " -ref " + opt.ref + " -w2v data/word2vec/GoogleNews-vectors-negative300-SLIM.bin.gz -join max > ./tmp/tmp2" print (chrF1) print (chrF2) os.system(chrF1) os.system(chrF2) print ("<<< finish getting the chrF score and store then in tmp file") # extract the score from the file and remove the last three line of the tmp file, it contains the total valuation infos print (">>> read the score oben and compare the result") tmp_sc1 = [float(li.rstrip('\n')[1:-1].split(' ')[-1]) for li in open('./tmp/tmp1')] tmp_sc2 = [float(li.rstrip('\n')[1:-1].split(' ')[-1]) for li in open('./tmp/tmp2')] #clean data os.system('rm /tmp/data_ref') os.system('rm /tmp/data_s1') os.system('rm /tmp/data_s2') assert(len(tmp_sc1) == len(tmp_sc2)) def _compare(a,b): if a>b: return 1 elif a<b: return -1 else: return 0 zip_sc = zip(tmp_sc1, tmp_sc2) tmp_rs = [_compare(sc1,sc2) for sc1, sc2 in zip_sc] print ("<<< finish comparing") # get the target data and calculate the tau like corr print (">>> read target data and calculate the tau like correlation") tgt_rs = [int(li.rstrip('\n')) for li in open(opt.scores)] taul = valTauLike(tgt_rs, tmp_rs) print ("<<< finish.") print (taul)
def evaluate_tau_like(arr1, arr2): """ arr1 comes from the model arr2 comes from the target file """ a1 = arr1.cpu() a2 = arr2.cpu() a1 = a1.data.numpy() a2 = a2.data.numpy() a1 = list(map(result_transform_sf_to_score, a1)) a2 = a2 - 1 taul = valTauLike(a2, a1) # a2 should go first return taul
def main(): # load the parameter from the command line opt = parser.parse_args() # run the chrF to get the chrF score for system one and system two print(">>> start getting the chrF scores") comm1 = 'cat ' + opt.s1 + ' |$MOSESROOT/scripts/tokenizer/tokenizer.perl > /tmp/tokenized_s1' comm2 = 'cat ' + opt.s2 + ' |$MOSESROOT/scripts/tokenizer/tokenizer.perl > /tmp/tokenized_s2' comm3 = 'cat ' + opt.ref + ' |$MOSESROOT/scripts/tokenizer/tokenizer.perl > /tmp/tokenized_ref' print(comm1) print(comm2) print(comm3) os.system(comm1) os.system(comm2) os.system(comm3) comm4 = 'cat /tmp/tokenized_s1 |$MOSESROOT/mert/sentence-bleu /tmp/tokenized_ref > ./tmp/tmp1' comm5 = 'cat /tmp/tokenized_s2 |$MOSESROOT/mert/sentence-bleu /tmp/tokenized_ref > ./tmp/tmp2' print(comm4) print(comm5) os.system(comm4) os.system(comm5) print("<<< finish getting the chrF score and store then in tmp file") # extract the score from the file and remove the last three line of the tmp file, it contains the total valuation infos print(">>> read the score oben and compare the result") tmp_sc1 = [ float(li.rstrip('\n').split('\t')[-1]) for li in open('./tmp/tmp1') ] tmp_sc2 = [ float(li.rstrip('\n').split('\t')[-1]) for li in open('./tmp/tmp2') ] # conpare the score of two system assert (len(tmp_sc1) == len(tmp_sc2)) def _compare(a, b): if a > b: return 1 elif a < b: return -1 else: return 0 zip_sc = zip(tmp_sc1, tmp_sc2) tmp_rs = [_compare(sc1, sc2) for sc1, sc2 in zip_sc] print("<<< finish comparing") # get the target data and calculate the tau like corr print(">>> read target data and calculate the tau like correlation") tgt_rs = [int(li.rstrip('\n')) for li in open(opt.scores)] taul = valTauLike(tgt_rs, tmp_rs) print("<<< finish.") print(taul)
def main(): # load the parameter from the command line opt = parser.parse_args() # run the chrF to get the chrF score for system one and system two #print (">>> start getting the chrF scores") chrF1 = "python code/NNMetric/test2.py -hyp " + opt.s1 + " -ref " + opt.ref + " -type " + opt.type + " > ./tmp/tmp1" chrF2 = "python code/NNMetric/test2.py -hyp " + opt.s2 + " -ref " + opt.ref + " -type " + opt.type + " > ./tmp/tmp2" # chrF1= "python code/NNMetric/test.py -hyp " + opt.s1 + " -ref " + opt.ref + " -w2v data/word2vec/GoogleNews-vectors-negative300-SLIM.bin.gz -join max > ./tmp/tmp1" # chrF2= "python code/NNMetric/test.py -hyp " + opt.s2 + " -ref " + opt.ref + " -w2v data/word2vec/GoogleNews-vectors-negative300-SLIM.bin.gz -join max > ./tmp/tmp2" #print (chrF1) #print (chrF2) os.system(chrF1) os.system(chrF2) #print ("<<< finish getting the chrF score and store then in tmp file") # extract the score from the file and remove the last three line of the tmp file, it contains the total valuation infos #print (">>> read the score oben and compare the result") tmp_sc1 = [ float(li.rstrip('\n')[1:-1].split(' ')[-1]) for li in open('./tmp/tmp1') ] tmp_sc2 = [ float(li.rstrip('\n')[1:-1].split(' ')[-1]) for li in open('./tmp/tmp2') ] #tmp_sc1.pop(0) #tmp_sc2.pop(0) #for i in range(3): # tmp_sc1.pop(-1) # tmp_sc2.pop(-1) # conpare the score of two system assert (len(tmp_sc1) == len(tmp_sc2)) def _compare(a, b): if a > b: return 1 elif a < b: return -1 else: return 0 zip_sc = zip(tmp_sc1, tmp_sc2) tmp_rs = [_compare(sc1, sc2) for sc1, sc2 in zip_sc] #print ("<<< finish comparing") # get the target data and calculate the tau like corr #print (">>> read target data and calculate the tau like correlation") tgt_rs = [int(li.rstrip('\n')) for li in open(opt.scores)] taul = valTauLike(tgt_rs, tmp_rs) #print ("<<< finish.") print(opt.ref + '_' + opt.type, taul)
def evaluate_tau_like(model, src, tgt): """ arr1 is the output of the model, and arr2 is the tgt so we should put the arr1 in the second parameter seat. """ arr1 = predict(model, src) arr1 = arr1.numpy() arr2 = tgt.numpy() if arr1.shape[1] == 3: arr1 = list(map(result_transform_sf_to_score, arr1)) arr2 = arr2 - 1 else: arr1 = list(map(lambda x: round(x), arr1)) taul = valTauLike(arr2, arr1) return taul
def main(): # load the parameter from the command line opt = parser.parse_args() # run the chrF to get the chrF score for system one and system two print(">>> start getting the chrF scores") chrF1 = "python ./code/chrF/chrF++.py -H " + opt.s1 + " -R " + opt.ref + " -nw 0 -b 3 -s > ./tmp/tmp1" chrF2 = "python ./code/chrF/chrF++.py -H " + opt.s2 + " -R " + opt.ref + " -nw 0 -b 3 -s > ./tmp/tmp2" print(chrF1) print(chrF2) os.system(chrF1) os.system(chrF2) print("<<< finish getting the chrF score and store then in tmp file") # extract the score from the file and remove the last three line of the tmp file, it contains the total valuation infos print(">>> read the score oben and compare the result") tmp_sc1 = [ float(li.rstrip('\n').split('\t')[-1]) for li in open('./tmp/tmp1') ] tmp_sc2 = [ float(li.rstrip('\n').split('\t')[-1]) for li in open('./tmp/tmp2') ] tmp_sc1.pop(0) tmp_sc2.pop(0) for i in range(3): tmp_sc1.pop(-1) tmp_sc2.pop(-1) # conpare the score of two system assert (len(tmp_sc1) == len(tmp_sc2)) def _compare(a, b): if a > b: return 1 elif a < b: return -1 else: return 0 zip_sc = zip(tmp_sc1, tmp_sc2) tmp_rs = [_compare(sc1, sc2) for sc1, sc2 in zip_sc] print("<<< finish comparing") # get the target data and calculate the tau like corr print(">>> read target data and calculate the tau like correlation") tgt_rs = [int(li.rstrip('\n')) for li in open(opt.scores)] taul = valTauLike(tgt_rs, tmp_rs) print("<<< finish.") print(taul)
def evaluate_tau_like(model, src, tgt): arr1 = predict(model, src) arr1 = arr1.numpy() arr2 = tgt.numpy() if arr1.shape[1] == 3: arr1 = list(map(result_transform_sf_to_score, arr1)) arr2 = arr2 - 1 else: arr1 = list(map(lambda x: round(x), arr1)) taul = valTauLike(arr1, arr2) return taul arr1 = predict(model, src) arr1 = arr1.numpy() arr2 = tgt.numpy() if opt.rank: if arr1.shape[1] == 3: # softmax output arr1 = numpy.array(list(map(result_transform_sf_to_score, arr1))) arr2 = arr2 - 1 else: arr1 = numpy.array(list(map(lambda x: round(x), arr1))) corr = stats.spearmanr(arr1, arr2)[0] return corr
def test_da_model_with_rr_data(opt, model, data): """ train a da model and test this model with rr data, use the s1+ref and s2+ref to get the scores seperantly and compare the score the get the darr result input: model: a da model src: da data tgt: rr data """ # reload the data to do the test tgt = "../data/MasterArbeit/plan_c_rank_de.en/train_result" src_sys = "../data/MasterArbeit/plan_c_rank_de.en/train_s1_hidden" src_sys2 = "../data/MasterArbeit/plan_c_rank_de.en/train_s2_hidden" src_ref = "../data/MasterArbeit/plan_c_rank_de.en/train_ref_hidden" tgt_val = "../data/MasterArbeit/plan_c_rank_de.en/train_result" src_val_sys = "../data/MasterArbeit/plan_c_rank_de.en/train_s1_hidden" src_val_sys2 = "../data/MasterArbeit/plan_c_rank_de.en/train_s2_hidden" src_val_ref = "../data/MasterArbeit/plan_c_rank_de.en/train_ref_hidden" tgt_test = "../data/MasterArbeit/plan_c_rank_de.en/test_result" src_test_sys = "../data/MasterArbeit/plan_c_rank_de.en/test_s1_hidden" src_test_sys2 = "../data/MasterArbeit/plan_c_rank_de.en/test_s2_hidden" src_test_ref = "../data/MasterArbeit/plan_c_rank_de.en/test_ref_hidden" rank = True sf_output = False data.reload_data(src_sys, src_sys2, src_ref, tgt, src_val_sys, src_val_sys2, src_val_ref, tgt_val, src_test_sys, src_test_sys2, src_test_ref, tgt_test, rank, sf_output) # compute the scores num_train, num_val, num_test = data.get_nu_batch() if not rank: # if True: for i in range(num_test): src, tgt = data.get_test_batch() if opt.cuda == "True": tgt = tgt.cuda() corr = evaluate_corr(model, src, tgt) out_corr = out_corr + corr tmp_corr = "%d,%f" % (i, corr) file_test.write(tmp_corr) file_test.write('\n') else: src = [] tgt = [] for i in range(num_test): tmp_src, tmp_tgt = data.get_test_batch() src.append(tmp_src) tgt.append(tmp_tgt) src = torch.cat(src) tgt = torch.cat(tgt) print(src.shape, tgt.shape) ref, s1, s2 = src.split(500, 1) src1 = torch.cat((s1, ref), 1) src2 = torch.cat((s2, ref), 1) o1 = predict(model, src1) o2 = predict(model, src2) print(o1.shape, o2.shape) o1 = o1.squeeze().numpy().tolist() o2 = o2.squeeze().numpy().tolist() rr = daToRr(o1, o2, 0) taul = valTauLike(tgt, rr) return taul
def main(): # load the parameter from the command line opt = parser.parse_args() # get lan pair and year and type get_date = re.compile('2\d+') get_lan = re.compile('[a-z]{2}\.[a-z]{2}') date = get_date.search(opt.s1).group(0) lanp = get_lan.search(opt.s1).group(0) # set tmp name for the get_embeddings output o_ref = '/tmp/' + lanp + date + opt.type + '.ref' o_s1 = '/tmp/' + lanp + date + opt.type + '.s1' o_s2 = '/tmp/' + lanp + date + opt.type + '.s2' # run get_embeddings to get the word embeddings for the input if (not os.path.exists(o_ref) or not os.path.exists(o_s1) or not os.path.exists(o_s2)): comm_ref = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model " + opt.model + " -type " + opt.type + " -src " + opt.ref + " -output " + o_ref comm_s1 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model " + opt.model + " -type " + opt.type + " -src " + opt.s1 + " -output " + o_s1 comm_s2 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model " + opt.model + " -type " + opt.type + " -src " + opt.s2 + " -output " + o_s2 if opt.type == 'decoder_hidden' or opt.type == 'decoder_hidden_last': comm_ref = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model " + opt.model + " -type " + opt.type + " -src " + opt.src + " -output " + o_ref + " -tgt " + opt.ref comm_s1 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model " + opt.model + " -type " + opt.type + " -src " + opt.src + " -output " + o_s1 + " -tgt " + opt.s1 comm_s2 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model " + opt.model + " -type " + opt.type + " -src " + opt.src + " -output " + o_s2 + " -tgt " + opt.s2 print(comm_ref) print(comm_s1) print(comm_s2) os.system(comm_ref) os.system(comm_s1) os.system(comm_s2) # run test to get process the word embeddings print(">>> start getting the chrF scores") chrF1 = "python code/NNMetric/test6.py -hyp " + o_s1 + " -ref " + o_ref + " > /tmp/tmp1" chrF2 = "python code/NNMetric/test6.py -hyp " + o_s2 + " -ref " + o_ref + " > /tmp/tmp2" print(chrF1) print(chrF2) os.system(chrF1) os.system(chrF2) print("<<< finish getting the chrF score and store then in tmp file") print(">>> read the score oben and compare the result") tmp_sc1 = [float(li.rstrip('\n')) for li in open('/tmp/tmp1')] tmp_sc2 = [float(li.rstrip('\n')) for li in open('/tmp/tmp2')] #clean data assert (len(tmp_sc1) == len(tmp_sc2)) def _compare(a, b): if a > b: return 1 elif a < b: return -1 else: return 0 zip_sc = zip(tmp_sc1, tmp_sc2) tmp_rs = [_compare(sc1, sc2) for sc1, sc2 in zip_sc] print("<<< finish comparing") # get the target data and calculate the tau like corr print(">>> read target data and calculate the tau like correlation") tgt_rs = [int(li.rstrip('\n')) for li in open(opt.scores)] taul = valTauLike(tgt_rs, tmp_rs) print("<<< finish.") print(taul)