def get_AB(): '''gets all AB pairs from test data s.t. A in one and B in one''' c = configure.cfg(sys.argv[1]) with open("files/test") as test_files: test_list = [x.strip() for x in test_files] test_data = parser.parse(c.dir, test_list, c.plus, c.minus, c.cluster) with open("files/one") as train_files: train_list = [x.strip() for x in train_files] train_data = parser.parse(c.dir, train_list, c.plus, c.minus, c.cluster) #could items have all the items whose keys are not in test_data removed? items = [x for x in test_data.hashwords.items() if x[0] in train_data.hashwords.keys()] with open("files/AB.tmp","w") as out: for A,A_ptr in items: A_obj = test_data.lookup(A_ptr) for B_ptr,count in A_obj.data.items(): if count > 1: try: B = test_data.wordshash[B_ptr] train_data.hashwords[B] outstr = A + " " + B + "\n" out.write(outstr) except: pass
def prune_AB(): ''' from AB.tmp, remove all AB s.t. AB in ninetynine''' c = configure.cfg(sys.argv[1]) with open("files/ninetynine") as files: lines = [x.strip() for x in files] nine_data = parser.parse(c.dir, lines, c.plus, c.minus, c.cluster) with open("files/AB.tmp") as AB: with open("files/AB.final","w") as out: for line in AB: A,B = line.split() A_ptr = nine_data.hashwords[A] B_ptr = nine_data.hashwords[B] if nine_data.exists(A_ptr, B_ptr) == 0: out.write(line)
print("******************************************************") print("skew this machine answered " + str(skew_correct_answers[i]) + " out of "+ str(total)) print("that's " + str(skew_correct_answers[i]/total) +"%") print("there were " + str(skew_ties[i]) + " ties") print("******************************************************") with open(os.path.join("./results/", result_file), 'w') as out: out.write("using " + sys.argv[1] + " as conf file \n") out.write("took " + str(time() - start_time) + " seconds.\n") for i,k in enumerate(kl): out.write("\n** el one ** \n k: "+str(k)+"\ntotal: " + str(total) + "\n correct: " + str(el_correct_answers[i]) + "\n ties: " + str(el_ties[i])) out.write("\n** cos ** \n k: "+str(k)+"\ntotal: " + str(total) + "\n correct: " + str(cos_correct_answers[i]) + "\n ties: " + str(cos_ties[i])) out.write("\n** skew ** \n k: "+str(k)+"\n total: " + str(total) + "\n correct: " + str(skew_correct_answers[i]) + "\n ties: " + str(skew_ties[i])) train_data.shutdown() if __name__ == "__main__": c = configure.cfg(sys.argv[1]) for f in os.listdir("files"): if f[0] != '.': print("Running test with " + f + " as corpus file") with open(os.path.join('files/',f)) as fi: train_list = [x.strip() for x in fi] one_run(c, train_list, f)