def test_accuracy(): graphs_fname = config["test_data_dir"] + "/graphs_sample_out_de-en.pkl" graphs = cPickle.load(open(graphs_fname)) ref_fname = config["test_data_dir"] + "/lemma_sample_out_de-en.ref" result = accuracy_score(graphs[:1], ref_fname, "freq_score") assert result.correct == 5 assert result.incorrect == 1 assert result.ignored == 1 assert_almost_equal(result.score, 0.8333333)
("incorrect", "i"), ("ignored", "i"), ("accuracy", "f"), ("graphs", "i"), ("nist", "f"), ("bleu", "f"), ("exp_name", "S128"), ] new_results = np.zeros(len(old_results), descriptor) for i, exp in enumerate(old_results): ref_fname = config["eval"][exp["data"]][exp["source"] + "-" + exp["target"]]["lemma_ref_fname"] graphs_fname = "_{}/{}_graphs.pkl".format(name, exp["exp_name"]) graphs = cPickle.load(open(graphs_fname)) accuracy = accuracy_score(graphs, ref_fname, name + "_score") new_results[i]["graphs"] = len(graphs) new_results[i]["data"] = exp["data"] new_results[i]["source"] = exp["source"] new_results[i]["target"] = exp["target"] new_results[i]["min_count"] = exp["min_count"] new_results[i]["max_freq"] = exp["max_freq"] new_results[i]["correct"] = accuracy.correct new_results[i]["incorrect"] = accuracy.incorrect new_results[i]["accuracy"] = accuracy.score new_results[i]["ignored"] = accuracy.ignored new_results[i]["nist"] = exp["nist"] new_results[i]["bleu"] = exp["bleu"] new_results[i]["exp_name"] = exp["exp_name"] np.save("_" + name + "-acc.npy", new_results)