Example #1
0
def find_best_threshold(tree, method, input_file, output_file, n=4, idf_enabled=False):
    if method in [knn_classifier, knn_classifier_xv]:
        features = get_features(input_file, idf_enabled)
        write_features("tmp.tab", features) 
        results = knn_classifier(None, outfile="tmp.tab")
    else:
        results = method(tree, output=output_file, n=n, idf_enabled=idf_enabled)

    reference = parse_reference(input_file)  # some speedup, read once

    best_threshold = 0.01
    best_accuracy = 0
    threshold = 0.01
    while(threshold <= 1):
        threshold = round(threshold,2)
        classification = classify_results(results, threshold) 
        #write(classification, output_file)
        #find accuracy
        acc = evaluate(input_file, output_file, pred_id2label=classification, ref_id2label=reference)
        print "th:", threshold, "acc:",acc
        if acc >= best_accuracy:
            best_threshold = threshold
            best_accuracy = acc
        threshold += 0.01
    print "best threshold was %.2f with %.4f accuracy" % (best_threshold,
                                            best_accuracy)
Example #2
0
def predict():
	
	# Reads the list of word matches, and prints to file a list of predictions in RTE output format                   
	b = open("bleuresults.txt")
	bleu = read_file(b)
	b.close()
	w = open("wordmatches.txt")
	word = read_file(w)
	w.close()
	l = open("lemma_matches.txt")
	lemma = read_file(l)
	l.close()
	p = open("pos-tag_matches.txt")
	pos = read_file(p)
	p.close()
	
	print len(bleu), len(word), len(lemma), len(pos)
	
	file = open("finalresult.txt", "wb")
	if file:
		print >> file, "ranked: no"	
		for i in range(len(bleu)):
			yes = 0
			no = 0
			if bleu[i] > bleu_threshold:
				yes += bleu_result
			elif bleu[i] < bleu_threshold:
				no += bleu_result
				
			if word[i] > word_threshold:
				yes += word_result
			elif word[i] < word_threshold:
				no += word_result
				
			if lemma[i] > lemma_threshold:
				yes += lemma_result
			elif lemma[i] < lemma_threshold:
				no += lemma_result
				
			if pos[i] > pos_threshold:
				yes += pos_result
			elif pos[i] < pos_threshold:
				no += pos_result
				
			if yes > no:
				print >> file, i+1, "YES"
			else:
				print >> file, i+1, "NO"
		file.close()
	else:
		print "Error opening file"
		
	match = eval_rte.evaluate("RTE2_dev.xml", "finalresult.txt")
	print "%.4f" %match
Example #3
0
File: rte.py Project: laat/ex3
def main(tree, output, method, threshold, find_best, n=4, idf_enabled=False):
    #load xml and idf
    if method in ["word", "lemma", "bleu"]:
        print "Loading xmlfile"
        tree = (load_xml.get_pairs(tree), tree)
        print "done."

        if idf_enabled:
            generate_idf_score(tree[0])

    elif method in ["print_ted", "ted"]:
        print "Loading xmlfile"
        tree = (create_tree.generate_syntax_tree(tree), tree)
        print "done."

        if idf_enabled:
            generate_idf_score(load_xml.get_pairs(tree[1]))

    elif method in ["features"]:
        features = get_features(tree, idf_enabled)
        write_features(output, features) 
        return
    elif method in ["knn", "knn-xv"]:
        tree = (tree, tree)
    
    #run methods
    if find_best:
        find_best_threshold(tree[0], METHODS[method], tree[1], 
                            output, n=n, idf_enabled=idf_enabled)
    else:
        if method in ["knn", "knn-xv"]:
            features = get_features(tree[0], idf_enabled=idf_enabled)
            write_features("features.tab", features) 
            results = METHODS[method](None, outfile="features.tab")
        else:
            results = METHODS[method](tree[0], n=n, idf_enabled=idf_enabled, 
                                  output=output)
        if method == "print_ted":
            return
        classification = classify_results(results, threshold) 

        print "writing output"
        write(classification, output)
        print "Accuracy = %.4f" % evaluate(tree[1], output)
Example #4
0
File: tweaked.py Project: laat/ex3
def main(training_data, test_data, output_file):
    if test_data:
        training_features = get_features(training_data)
        test_features = get_features(test_data)

        write_f("train.tab", training_features)
        write_f("test.tab", test_features)
        results = tweaked_on_testdata("train.tab", "test.tab")
        classification = classify_results(results, 0.5) 

        print "witing output"
        write(classification, output_file)
    else:
        training_features = get_features(training_data)
        write_f("train.tab", training_features)
        results = tweaked("train.tab") # cross-validation

        print "classifying"
        classification = classify_results(results, 0.5) 
        print "writing output"
        write(classification, output_file)

        print "Accuracy = %.4f" % evaluate(training_data, output_file)
Example #5
0
def predict(step_size, name):
	step_size = float(step_size)
	
	# Reads the list of word matches, and prints to file a list of predictions in RTE output format                   
	file = open(name)
	c = []
	threshold = 0
	best_match = 0
	match_threshold = 0
	if file:
		for line in file:
			c.append(float(line))
		file.close()
	else:
		print "Error opening file"
	
	while threshold < 1:
		threshold = threshold + step_size
		out = "predictions.txt"
		file = open(out, 'wb')
		if file:
			print >> file, "ranked: no"
	
			for i in range(len(c)):
				if c[i] > threshold:
					print >> file, i+1, "YES"
				else:
					print >> file, i+1, "NO"
			file.close()
		else:
			print "Error opening file" 
		match = eval_rte.evaluate("RTE2_dev.xml", "predictions.txt")
		if match > best_match:
			best_match = match
			match_threshold = threshold
		
	print "Best match : %.4f" %best_match, "match threshold : %.4f" %match_threshold