def run_test(inferrer): if args.candidatesfile != None: target2candidates = read_candidates(args.candidatesfile) else: target2candidates = None tfi = open(args.testfile, 'r') tfo = open(args.resultsfile, 'w') tfo_ranked = open(args.resultsfile+'.ranked', 'w') tfo_generated_oot = open(args.resultsfile+'.generated.oot', 'w') tfo_generated_best = open(args.resultsfile+'.generated.best', 'w') lines = 0 while True: context_line = tfi.readline() if not context_line: break; lst_instance = ContextInstance(context_line, args.no_pos) lines += 1 if (args.debug == True): tfo.write("\nTest context:\n") tfo.write("***************\n") tfo.write(lst_instance.decorate_context()) result_vec = inferrer.find_inferred(lst_instance, tfo) generated_results = inferrer.generate_inferred(result_vec, lst_instance.target, lst_instance.target_lemma, lst_instance.pos) tfo.write("\nGenerated lemmatized results\n") tfo.write("***************\n") tfo.write("GENERATED\t" + ' '.join([lst_instance.full_target_key, lst_instance.target_id]) + " ::: " + vec_to_str_generated(generated_results.iteritems(), args.topgenerated)+"\n") tfo_generated_oot.write(' '.join([lst_instance.full_target_key, lst_instance.target_id]) + " ::: " + vec_to_str_generated(generated_results.iteritems(), args.topgenerated)+"\n") tfo_generated_best.write(' '.join([lst_instance.full_target_key, lst_instance.target_id]) + " :: " + vec_to_str_generated(generated_results.iteritems(), 1)+"\n") filtered_results = inferrer.filter_inferred(result_vec, target2candidates[lst_instance.target_key], lst_instance.pos) tfo.write("\nFiltered results\n") tfo.write("***************\n") tfo.write("RANKED\t" + ' '.join([lst_instance.full_target_key, lst_instance.target_id]) + "\t" + vec_to_str(filtered_results.iteritems(), len(filtered_results))+"\n") tfo_ranked.write("RANKED\t" + ' '.join([lst_instance.full_target_key, lst_instance.target_id]) + "\t" + vec_to_str(filtered_results.iteritems(), len(filtered_results))+"\n") # print "end %f" % time.time() if lines % 10 == 0: print "Read %d lines" % lines print "Read %d lines in total" % lines print "Time per word: %f msec" % inferrer.msec_per_word() tfi.close() tfo.close() tfo_ranked.close() tfo_generated_oot.close() tfo_generated_best.close()
def run_test(inferrer): if args.candidatesfile is not None: target2candidates = read_candidates(args.candidatesfile) else: target2candidates = None tfi = open(args.testfile, 'r') tfo = open(args.resultsfile, 'w') tfo_ranked = open(args.resultsfile + '.ranked', 'w') tfo_generated_oot = open(args.resultsfile + '.generated.oot', 'w') tfo_generated_best = open(args.resultsfile + '.generated.best', 'w') lines = 0 while True: context_line = tfi.readline() if not context_line: break lst_instance = ContextInstance(context_line, args.no_pos) lines += 1 if args.debug: tfo.write("\nTest context:\n") tfo.write("***************\n") tfo.write(lst_instance.decorate_context()) result_vec = inferrer.find_inferred(lst_instance, tfo) generated_results = inferrer.generate_inferred( result_vec, lst_instance.target, lst_instance.target_lemma, lst_instance.pos) tfo.write("\nGenerated lemmatized results\n") tfo.write("***************\n") tfo.write( "GENERATED\t" + ' '.join([lst_instance.full_target_key, lst_instance.target_id]) + " ::: " + vec_to_str_generated(iter(generated_results.items()), args.topgenerated) + "\n") tfo_generated_oot.write( ' '.join([lst_instance.full_target_key, lst_instance.target_id]) + " ::: " + vec_to_str_generated(iter(generated_results.items()), args.topgenerated) + "\n") tfo_generated_best.write( ' '.join([lst_instance.full_target_key, lst_instance.target_id]) + " :: " + vec_to_str_generated(iter(generated_results.items()), 1) + "\n") filtered_results = inferrer.filter_inferred( result_vec, target2candidates[lst_instance.target_key], lst_instance.pos) tfo.write("\nFiltered results\n") tfo.write("***************\n") tfo.write( "RANKED\t" + ' '.join([lst_instance.full_target_key, lst_instance.target_id]) + "\t" + vec_to_str(iter(filtered_results.items()), len(filtered_results)) + "\n") tfo_ranked.write( "RANKED\t" + ' '.join([lst_instance.full_target_key, lst_instance.target_id]) + "\t" + vec_to_str(iter(filtered_results.items()), len(filtered_results)) + "\n") # print "end %f" % time.time() if lines % 10 == 0: print("Read %d lines" % lines) print("Read %d lines in total" % lines) print("Time per word: %f msec" % inferrer.msec_per_word()) tfi.close() tfo.close() tfo_ranked.close() tfo_generated_oot.close() tfo_generated_best.close()
def candidate_ranking_out(data_f, words2elmo_token, target_ws, sents, position_lst, target_w2candidates, w2index, w2elmo, vocab_all): output_f_rank = open( data_f.split('/')[-1] + '.' + model + 'vocab80000.ranked', 'w') output_f_oot = open( data_f.split('/')[-1] + '.' + model + 'vocab80000.oot', 'w') output_f_best = open( data_f.split('/')[-1] + '.' + model + 'vocab80000.best', 'w') print('normalizing vectors') w2elmo = normalize(w2elmo) words2elmo_token = normalize(words2elmo_token) sim_matrix = w2elmo.dot(words2elmo_token.T).T # w2elmo = w2elmo / np.sqrt((w2elmo * w2elmo).sum()) # words2elmo_token=words2elmo_token / np.sqrt((words2elmo_token * words2elmo_token).sum()) print('normalizing completed') for i in range(len(words2elmo_token)): if i % 100 == 0 and i >= 100: print(i) w2elmo_token = words2elmo_token[i] target_w_out = target_ws[i] target_w = target_w_out.split()[0] pos = target_w.split('.')[-1] target_w_lemma = target_w.split('.')[0] #similarity matrix # similarity=(w2elmo.dot(w2elmo_token)+1.0)/2 similarity = (sim_matrix[i] + 1.0) / 2 result_vec = sorted(zip(vocab_all, similarity), reverse=True, key=lambda x: x[1]) try: candidates = target_w2candidates['.'.join(target_w.split('.')[:2])] except KeyError as e: print( 'target w does not occur in gold candidates list: {0}'.format( e)) continue #ranked result filtered_results = filter_inferred(result_vec, candidates, pos) # candis_cos=sorted(filtered_results.items(),key=lambda x:x[1],reverse=True) # candis_cos='\t'.join([res[0]+' '+str(res[1]) for res in candis_cos]) # out_line='RANKED\t{0}\t{1}\n'.format(target_w,candis_cos) # output_f_rank.write(out_line) output_f_rank.write( "RANKED\t" + target_w_out + "\t" + vec_to_str(filtered_results.items(), len(filtered_results)) + "\n") #generate result generated_results = generate_inferred(result_vec, sents[i][position_lst[i]], target_w_lemma, pos) output_f_oot.write( target_w_out + " ::: " + vec_to_str_generated(generated_results.items(), 10) + "\n") output_f_best.write( target_w_out + " :: " + vec_to_str_generated(generated_results.items(), 1) + "\n") output_f_rank.close() output_f_best.close() output_f_oot.close()