Beispiel #1
0
def get_AB():

    '''gets all AB pairs from test data s.t. A in one and B in one'''

    c = configure.cfg(sys.argv[1])
    with open("files/test") as test_files:
        test_list = [x.strip() for x in test_files]
        test_data = parser.parse(c.dir, test_list, c.plus, c.minus, c.cluster)

    with open("files/one") as train_files:
        train_list = [x.strip() for x in train_files]
        train_data = parser.parse(c.dir, train_list, c.plus, c.minus, c.cluster)

    #could items have all the items whose keys are not in test_data removed?
    items = [x for x in test_data.hashwords.items() if x[0] in train_data.hashwords.keys()]


    with open("files/AB.tmp","w") as out:
        for A,A_ptr in items:
            A_obj = test_data.lookup(A_ptr)
            for B_ptr,count in A_obj.data.items():
                if count > 1:
                    try:
                        B = test_data.wordshash[B_ptr]
                        train_data.hashwords[B]
                        outstr = A + " " + B + "\n"
                        out.write(outstr)
                    except:
                        pass
Beispiel #2
0
def prune_AB():

    ''' from AB.tmp, remove all AB s.t. AB in ninetynine'''

    c = configure.cfg(sys.argv[1])
    with open("files/ninetynine") as files:
        lines = [x.strip() for x in files]
        nine_data = parser.parse(c.dir, lines, c.plus, c.minus, c.cluster)

    with open("files/AB.tmp") as AB:
        with open("files/AB.final","w") as out:
            for line in AB:
                A,B = line.split()
                A_ptr = nine_data.hashwords[A]
                B_ptr = nine_data.hashwords[B]
                if nine_data.exists(A_ptr, B_ptr) == 0:
                    out.write(line)
Beispiel #3
0
            print("******************************************************")
            
            print("skew this machine answered " + str(skew_correct_answers[i]) + " out of "+ str(total))
            print("that's " + str(skew_correct_answers[i]/total) +"%")
            print("there were " + str(skew_ties[i]) + " ties")
            print("******************************************************")

        with open(os.path.join("./results/", result_file), 'w') as out:
            out.write("using " + sys.argv[1] + " as conf file \n")
            out.write("took " + str(time() - start_time) + " seconds.\n")
            for i,k in enumerate(kl):
                out.write("\n** el one ** \n k: "+str(k)+"\ntotal: " + str(total) + "\n correct: " + 
                        str(el_correct_answers[i]) + "\n ties: " + str(el_ties[i])) 
                out.write("\n** cos ** \n k: "+str(k)+"\ntotal: " + str(total) + "\n correct: " + 
                        str(cos_correct_answers[i]) + "\n ties: " + str(cos_ties[i])) 
                out.write("\n** skew ** \n k: "+str(k)+"\n total: " + str(total) + "\n correct: " + 
                        str(skew_correct_answers[i]) + "\n ties: " + str(skew_ties[i])) 

    train_data.shutdown()


if __name__ == "__main__":
    c = configure.cfg(sys.argv[1])

    for f in os.listdir("files"):
        if f[0] != '.':
            print("Running test with " + f + " as corpus file")
            with open(os.path.join('files/',f)) as fi:
                train_list = [x.strip() for x in fi]
            one_run(c, train_list, f)