Beispiel #1
0
def main(training_data, test_data, output_file):
    if test_data:
        training_features = get_features(training_data)
        test_features = get_features(test_data)

        write_f("train.tab", training_features)
        write_f("test.tab", test_features)
        results = tweaked_on_testdata("train.tab", "test.tab")
        classification = classify_results(results, 0.5) 

        print "witing output"
        write(classification, output_file)
    else:
        training_features = get_features(training_data)
        write_f("train.tab", training_features)
        results = tweaked("train.tab") # cross-validation

        print "classifying"
        classification = classify_results(results, 0.5) 
        print "writing output"
        write(classification, output_file)

        print "Accuracy = %.4f" % evaluate(training_data, output_file)
Beispiel #2
0
Datei: rte.py Projekt: laat/ex3
def main(tree, output, method, threshold, find_best, n=4, idf_enabled=False):
    #load xml and idf
    if method in ["word", "lemma", "bleu"]:
        print "Loading xmlfile"
        tree = (load_xml.get_pairs(tree), tree)
        print "done."

        if idf_enabled:
            generate_idf_score(tree[0])

    elif method in ["print_ted", "ted"]:
        print "Loading xmlfile"
        tree = (create_tree.generate_syntax_tree(tree), tree)
        print "done."

        if idf_enabled:
            generate_idf_score(load_xml.get_pairs(tree[1]))

    elif method in ["features"]:
        features = get_features(tree, idf_enabled)
        write_features(output, features) 
        return
    elif method in ["knn", "knn-xv"]:
        tree = (tree, tree)
    
    #run methods
    if find_best:
        find_best_threshold(tree[0], METHODS[method], tree[1], 
                            output, n=n, idf_enabled=idf_enabled)
    else:
        if method in ["knn", "knn-xv"]:
            features = get_features(tree[0], idf_enabled=idf_enabled)
            write_features("features.tab", features) 
            results = METHODS[method](None, outfile="features.tab")
        else:
            results = METHODS[method](tree[0], n=n, idf_enabled=idf_enabled, 
                                  output=output)
        if method == "print_ted":
            return
        classification = classify_results(results, threshold) 

        print "writing output"
        write(classification, output)
        print "Accuracy = %.4f" % evaluate(tree[1], output)