if not ds.parser.kb.server_available: logger.error("Server is not available. Please check the endpoint at: {}".format(ds.parser.kb.endpoint)) sys.exit(0) output_file = 'lcquadtestanswer_output' linker = Earl(path="data/LC-QUAD/entity_lcquad_test.json") base_dir = "./output" question_type_classifier_path = os.path.join(base_dir, "question_type_classifier") double_relation_classifier_path = os.path.join(base_dir, "double_relation_classifier") utility.makedirs(question_type_classifier_path) utility.makedirs(double_relation_classifier_path) question_type_classifier = SVMClassifier(os.path.join(question_type_classifier_path, "svm.model")) double_relation_classifier = SVMClassifier(os.path.join(double_relation_classifier_path, "svm.model")) stats = Stats() parser = LC_QaudParser() kb = parser.kb o = Orchestrator(logger, question_type_classifier, double_relation_classifier, parser, question_type_classifier_path, True) tmp = [] output = [] na_list = [] for qapair in ds.qapairs: stats.inc("total") output_row = {"question": qapair.question.text, "id": qapair.id, "query": qapair.sparql.query,
def bar_chart_per_feature(input_json): stats_overall = Stats() stats_features = Stats() stats_with_answer = dict() stats_without_answer = dict() for item in input_json: stats_overall.inc("total") if "answer" in item: stats_overall.inc(item["answer"]) for f in item["features"]: stats_features.inc(f) if item["answer"].startswith("-"): if item["answer"] not in stats_without_answer: stats_without_answer[item["answer"]] = Stats() stats_without_answer[item["answer"]].inc(f) else: if item["answer"] not in stats_with_answer: stats_with_answer[item["answer"]] = Stats() stats_with_answer[item["answer"]].inc(f) print stats_features print "-" * 10, "covered" stats_with_answer_keys = stats_with_answer.keys() stats_with_answer_keys.sort() for item in stats_with_answer_keys: print "{}: {} -- ".format(item, stats_overall[item]), stats_with_answer[item] print "-" * 10, "not covered" stats_without_answer_keys = stats_without_answer.keys() stats_without_answer_keys.sort() for item in stats_without_answer_keys: print "{}: {} -- ".format( item, stats_overall[item]), stats_without_answer[item] print "-" * 100 keys = stats_features.dict.keys() ind = range(len(stats_features.dict)) last = Stats() plt_idx = [] colors = ["green", "yellowgreen", "lightgreen", "lime", "olive"] fig = plt.figure() ax = plt.subplot(111) overall = [stats_features[key] for key in keys] p0 = ax.bar(ind, overall, 0.35, color='red') color_id = 0 for item in stats_with_answer_keys: answered = [stats_with_answer[item][key] for key in keys] tmp = [last[key] for key in keys] plt_idx.append( ax.bar(ind, answered, 0.2, color=colors[color_id], bottom=tmp)) last.dict = dict([(key, stats_with_answer[item][key] + last[key]) for key in keys]) color_id += 1 plt.xticks(ind, keys, rotation='vertical') plt.subplots_adjust(bottom=0.2, left=0.1, right=0.7) ax.legend([p0] + [item[0] for item in plt_idx], ["All"] + [item for item in stats_with_answer_keys], loc='center left', bbox_to_anchor=(1, 0.5)) plt.show()
def default(ds, id_to_include=[], n=-1): stat = Stats() stat["max_generated_queries"] = 0 for data in ds: if (len(id_to_include) == 0) or (data["id"] in id_to_include): stat.inc("total") if "answer" in data: stat.inc(data["answer"]) if stat["total"] == n: break if "generated_queries" in data: number_of_quries = len(data["generated_queries"]) if number_of_quries > 0: stat.inc("has_queries") stat.inc("generated_queries", number_of_quries) if number_of_quries > stat["max_generated_queries"]: stat["max_generated_queries"] = number_of_quries if number_of_quries > 3: stat.inc("more_than_three_queries") return stat
miss_match = False for item in list1: target_uri = item.uris[0] found = False for e2_item in list2: if target_uri in e2_item.uris: found = True break if not found: miss_match = True break return miss_match if __name__ == "__main__": stats = Stats() ds = LC_Qaud_Linked("../data/LC-QUAD/linked.json") ds.load() ds.parse() goldLinker = GoldLinker() # earl = Earl("../data/LC-QUAD/EARL/output.json") earl = Earl("../data/LC-QUAD/TagMeRelnliod/output_2300.json") for qapair in tqdm(ds.qapairs): e1, r1 = goldLinker.do(qapair) e2, r2 = earl.do(qapair, force_gold=False, top=100) if e2 is None and r2 is None: continue
import argparse from common.utility.stats import Stats if __name__ == "__main__": parser = argparse.ArgumentParser( description='Analyse the input of ranking model') parser.add_argument("--file", help="file name to load the results", default="tmp", dest="file_name") args = parser.parse_args() base_path = "../learning/treelstm/data/" sets = ["train", "dev", "test"] for item in sets: stats = Stats() with open("{}{}/{}/sim.txt".format(base_path, args.file_name, item)) as file_reader: file_data = file_reader.readlines() for line in file_data: stats.inc(line.replace("\n", "")) print item total = 0.0 + stats["1"] + stats["2"] print stats["1"] / total, stats["2"] / total
from parser.lc_quad_linked import LC_Qaud_Linked import json from kb.dbpedia import DBpedia from parser.answerparser import AnswerParser from common.utility.stats import Stats from common.container.answerset import AnswerSet from tqdm import tqdm stats = Stats() with open("../output/nliwod_origin.json") as data_file: nliwod = json.load(data_file) ds = LC_Qaud_Linked(path="../data/LC-QUAD/linked_answer6.json") ds.load() ds.parse() kb = DBpedia() parser = AnswerParser(kb) i = 0 for qapair in tqdm(ds.qapairs): nliwod_row = nliwod[i] if qapair.id == nliwod_row["q_id"]: query = nliwod_row["candidate"] raw_result = kb.query(query) result = AnswerSet(raw_result[1], parser.parse_queryresult) nliwod_row["answer"] = raw_result[1] if qapair.answerset == result: stats.inc("correct") nliwod_row["correct"] = True else: