コード例 #1
0
ファイル: jcs_main.py プロジェクト: orenmel/lexsub
def run_test(inferrer):
    
    if args.candidatesfile != None:
        target2candidates = read_candidates(args.candidatesfile)
    else:
        target2candidates = None

    tfi = open(args.testfile, 'r')
    tfo = open(args.resultsfile, 'w')
    tfo_ranked = open(args.resultsfile+'.ranked', 'w')
    tfo_generated_oot = open(args.resultsfile+'.generated.oot', 'w')
    tfo_generated_best = open(args.resultsfile+'.generated.best', 'w')
    
    lines = 0
    while True:
        context_line = tfi.readline()
        if not context_line:
            break;
        lst_instance = ContextInstance(context_line, args.no_pos)
        lines += 1
        if (args.debug == True):
            tfo.write("\nTest context:\n")
            tfo.write("***************\n")
            
        tfo.write(lst_instance.decorate_context())
        
        result_vec = inferrer.find_inferred(lst_instance, tfo)

        generated_results = inferrer.generate_inferred(result_vec, lst_instance.target, lst_instance.target_lemma, lst_instance.pos)
        
        tfo.write("\nGenerated lemmatized results\n")
        tfo.write("***************\n")
        tfo.write("GENERATED\t" + ' '.join([lst_instance.full_target_key, lst_instance.target_id]) + " ::: " + vec_to_str_generated(generated_results.iteritems(), args.topgenerated)+"\n")
        tfo_generated_oot.write(' '.join([lst_instance.full_target_key, lst_instance.target_id]) + " ::: " + vec_to_str_generated(generated_results.iteritems(), args.topgenerated)+"\n")
        tfo_generated_best.write(' '.join([lst_instance.full_target_key, lst_instance.target_id]) + " :: " + vec_to_str_generated(generated_results.iteritems(), 1)+"\n")
        
        filtered_results = inferrer.filter_inferred(result_vec, target2candidates[lst_instance.target_key], lst_instance.pos)
        
        tfo.write("\nFiltered results\n")
        tfo.write("***************\n")
        tfo.write("RANKED\t" + ' '.join([lst_instance.full_target_key, lst_instance.target_id]) + "\t" + vec_to_str(filtered_results.iteritems(), len(filtered_results))+"\n")
        tfo_ranked.write("RANKED\t" + ' '.join([lst_instance.full_target_key, lst_instance.target_id]) + "\t" + vec_to_str(filtered_results.iteritems(), len(filtered_results))+"\n")
        
#        print "end %f" % time.time()
        
        if lines % 10 == 0:
            print "Read %d lines" % lines                      
        
    print "Read %d lines in total" % lines 
    print "Time per word: %f msec" % inferrer.msec_per_word()          
    tfi.close()
    tfo.close()
    tfo_ranked.close()
    tfo_generated_oot.close()
    tfo_generated_best.close()
コード例 #2
0
ファイル: jcs_main.py プロジェクト: mdcclv/lexsub
def run_test(inferrer):

    if args.candidatesfile is not None:
        target2candidates = read_candidates(args.candidatesfile)
    else:
        target2candidates = None

    tfi = open(args.testfile, 'r')
    tfo = open(args.resultsfile, 'w')
    tfo_ranked = open(args.resultsfile + '.ranked', 'w')
    tfo_generated_oot = open(args.resultsfile + '.generated.oot', 'w')
    tfo_generated_best = open(args.resultsfile + '.generated.best', 'w')

    lines = 0
    while True:
        context_line = tfi.readline()
        if not context_line:
            break
        lst_instance = ContextInstance(context_line, args.no_pos)
        lines += 1
        if args.debug:
            tfo.write("\nTest context:\n")
            tfo.write("***************\n")

        tfo.write(lst_instance.decorate_context())

        result_vec = inferrer.find_inferred(lst_instance, tfo)

        generated_results = inferrer.generate_inferred(
            result_vec, lst_instance.target, lst_instance.target_lemma,
            lst_instance.pos)

        tfo.write("\nGenerated lemmatized results\n")
        tfo.write("***************\n")
        tfo.write(
            "GENERATED\t" +
            ' '.join([lst_instance.full_target_key, lst_instance.target_id]) +
            " ::: " + vec_to_str_generated(iter(generated_results.items()),
                                           args.topgenerated) + "\n")
        tfo_generated_oot.write(
            ' '.join([lst_instance.full_target_key, lst_instance.target_id]) +
            " ::: " + vec_to_str_generated(iter(generated_results.items()),
                                           args.topgenerated) + "\n")
        tfo_generated_best.write(
            ' '.join([lst_instance.full_target_key, lst_instance.target_id]) +
            " :: " + vec_to_str_generated(iter(generated_results.items()), 1) +
            "\n")

        filtered_results = inferrer.filter_inferred(
            result_vec, target2candidates[lst_instance.target_key],
            lst_instance.pos)

        tfo.write("\nFiltered results\n")
        tfo.write("***************\n")
        tfo.write(
            "RANKED\t" +
            ' '.join([lst_instance.full_target_key, lst_instance.target_id]) +
            "\t" +
            vec_to_str(iter(filtered_results.items()), len(filtered_results)) +
            "\n")
        tfo_ranked.write(
            "RANKED\t" +
            ' '.join([lst_instance.full_target_key, lst_instance.target_id]) +
            "\t" +
            vec_to_str(iter(filtered_results.items()), len(filtered_results)) +
            "\n")

        #        print "end %f" % time.time()

        if lines % 10 == 0:
            print("Read %d lines" % lines)

    print("Read %d lines in total" % lines)
    print("Time per word: %f msec" % inferrer.msec_per_word())
    tfi.close()
    tfo.close()
    tfo_ranked.close()
    tfo_generated_oot.close()
    tfo_generated_best.close()
コード例 #3
0
def candidate_ranking_out(data_f, words2elmo_token, target_ws, sents,
                          position_lst, target_w2candidates, w2index, w2elmo,
                          vocab_all):
    output_f_rank = open(
        data_f.split('/')[-1] + '.' + model + 'vocab80000.ranked', 'w')
    output_f_oot = open(
        data_f.split('/')[-1] + '.' + model + 'vocab80000.oot', 'w')
    output_f_best = open(
        data_f.split('/')[-1] + '.' + model + 'vocab80000.best', 'w')
    print('normalizing vectors')
    w2elmo = normalize(w2elmo)
    words2elmo_token = normalize(words2elmo_token)
    sim_matrix = w2elmo.dot(words2elmo_token.T).T

    #     w2elmo = w2elmo / np.sqrt((w2elmo * w2elmo).sum())
    #     words2elmo_token=words2elmo_token / np.sqrt((words2elmo_token * words2elmo_token).sum())
    print('normalizing completed')
    for i in range(len(words2elmo_token)):
        if i % 100 == 0 and i >= 100:
            print(i)
        w2elmo_token = words2elmo_token[i]
        target_w_out = target_ws[i]
        target_w = target_w_out.split()[0]
        pos = target_w.split('.')[-1]
        target_w_lemma = target_w.split('.')[0]
        #similarity matrix
        #             similarity=(w2elmo.dot(w2elmo_token)+1.0)/2
        similarity = (sim_matrix[i] + 1.0) / 2
        result_vec = sorted(zip(vocab_all, similarity),
                            reverse=True,
                            key=lambda x: x[1])
        try:
            candidates = target_w2candidates['.'.join(target_w.split('.')[:2])]
        except KeyError as e:
            print(
                'target w does not occur in gold candidates list: {0}'.format(
                    e))
            continue

        #ranked result
        filtered_results = filter_inferred(result_vec, candidates, pos)
        #             candis_cos=sorted(filtered_results.items(),key=lambda x:x[1],reverse=True)
        #             candis_cos='\t'.join([res[0]+' '+str(res[1]) for res in candis_cos])
        #             out_line='RANKED\t{0}\t{1}\n'.format(target_w,candis_cos)
        #             output_f_rank.write(out_line)
        output_f_rank.write(
            "RANKED\t" + target_w_out + "\t" +
            vec_to_str(filtered_results.items(), len(filtered_results)) + "\n")

        #generate result
        generated_results = generate_inferred(result_vec,
                                              sents[i][position_lst[i]],
                                              target_w_lemma, pos)
        output_f_oot.write(
            target_w_out + " ::: " +
            vec_to_str_generated(generated_results.items(), 10) + "\n")
        output_f_best.write(
            target_w_out + " :: " +
            vec_to_str_generated(generated_results.items(), 1) + "\n")

    output_f_rank.close()
    output_f_best.close()
    output_f_oot.close()