Example #1
0
    if not ds.parser.kb.server_available:
        logger.error("Server is not available. Please check the endpoint at: {}".format(ds.parser.kb.endpoint))
        sys.exit(0)

    output_file = 'lcquadtestanswer_output'
    linker = Earl(path="data/LC-QUAD/entity_lcquad_test.json")

    base_dir = "./output"
    question_type_classifier_path = os.path.join(base_dir, "question_type_classifier")
    double_relation_classifier_path = os.path.join(base_dir, "double_relation_classifier")
    utility.makedirs(question_type_classifier_path)
    utility.makedirs(double_relation_classifier_path)
    question_type_classifier = SVMClassifier(os.path.join(question_type_classifier_path, "svm.model"))
    double_relation_classifier = SVMClassifier(os.path.join(double_relation_classifier_path, "svm.model"))

    stats = Stats()

    parser = LC_QaudParser()
    kb = parser.kb

    o = Orchestrator(logger, question_type_classifier, double_relation_classifier, parser, question_type_classifier_path, True)

    tmp = []
    output = []
    na_list = []

    for qapair in ds.qapairs:
        stats.inc("total")
        output_row = {"question": qapair.question.text,
                      "id": qapair.id,
                      "query": qapair.sparql.query,
Example #2
0
def bar_chart_per_feature(input_json):
    stats_overall = Stats()
    stats_features = Stats()
    stats_with_answer = dict()
    stats_without_answer = dict()
    for item in input_json:
        stats_overall.inc("total")
        if "answer" in item:
            stats_overall.inc(item["answer"])
        for f in item["features"]:
            stats_features.inc(f)
            if item["answer"].startswith("-"):
                if item["answer"] not in stats_without_answer:
                    stats_without_answer[item["answer"]] = Stats()
                stats_without_answer[item["answer"]].inc(f)
            else:
                if item["answer"] not in stats_with_answer:
                    stats_with_answer[item["answer"]] = Stats()
                stats_with_answer[item["answer"]].inc(f)

    print stats_features
    print "-" * 10, "covered"
    stats_with_answer_keys = stats_with_answer.keys()
    stats_with_answer_keys.sort()
    for item in stats_with_answer_keys:
        print "{}: {} -- ".format(item,
                                  stats_overall[item]), stats_with_answer[item]
    print "-" * 10, "not covered"
    stats_without_answer_keys = stats_without_answer.keys()
    stats_without_answer_keys.sort()
    for item in stats_without_answer_keys:
        print "{}: {} -- ".format(
            item, stats_overall[item]), stats_without_answer[item]
    print "-" * 100

    keys = stats_features.dict.keys()
    ind = range(len(stats_features.dict))
    last = Stats()
    plt_idx = []
    colors = ["green", "yellowgreen", "lightgreen", "lime", "olive"]

    fig = plt.figure()
    ax = plt.subplot(111)

    overall = [stats_features[key] for key in keys]
    p0 = ax.bar(ind, overall, 0.35, color='red')

    color_id = 0
    for item in stats_with_answer_keys:
        answered = [stats_with_answer[item][key] for key in keys]
        tmp = [last[key] for key in keys]
        plt_idx.append(
            ax.bar(ind, answered, 0.2, color=colors[color_id], bottom=tmp))
        last.dict = dict([(key, stats_with_answer[item][key] + last[key])
                          for key in keys])
        color_id += 1

    plt.xticks(ind, keys, rotation='vertical')
    plt.subplots_adjust(bottom=0.2, left=0.1, right=0.7)
    ax.legend([p0] + [item[0] for item in plt_idx],
              ["All"] + [item for item in stats_with_answer_keys],
              loc='center left',
              bbox_to_anchor=(1, 0.5))
    plt.show()
Example #3
0
def default(ds, id_to_include=[], n=-1):
    stat = Stats()
    stat["max_generated_queries"] = 0
    for data in ds:
        if (len(id_to_include) == 0) or (data["id"] in id_to_include):

            stat.inc("total")
            if "answer" in data:
                stat.inc(data["answer"])
            if stat["total"] == n:
                break
            if "generated_queries" in data:
                number_of_quries = len(data["generated_queries"])
                if number_of_quries > 0:
                    stat.inc("has_queries")
                    stat.inc("generated_queries", number_of_quries)
                    if number_of_quries > stat["max_generated_queries"]:
                        stat["max_generated_queries"] = number_of_quries
                    if number_of_quries > 3:
                        stat.inc("more_than_three_queries")

    return stat
Example #4
0
    miss_match = False
    for item in list1:
        target_uri = item.uris[0]
        found = False
        for e2_item in list2:
            if target_uri in e2_item.uris:
                found = True
                break
        if not found:
            miss_match = True
            break
    return miss_match


if __name__ == "__main__":
    stats = Stats()

    ds = LC_Qaud_Linked("../data/LC-QUAD/linked.json")
    ds.load()
    ds.parse()

    goldLinker = GoldLinker()
    # earl = Earl("../data/LC-QUAD/EARL/output.json")
    earl = Earl("../data/LC-QUAD/TagMeRelnliod/output_2300.json")

    for qapair in tqdm(ds.qapairs):
        e1, r1 = goldLinker.do(qapair)
        e2, r2 = earl.do(qapair, force_gold=False, top=100)
        if e2 is None and r2 is None:
            continue
import argparse
from common.utility.stats import Stats

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='Analyse the input of ranking model')
    parser.add_argument("--file",
                        help="file name to load the results",
                        default="tmp",
                        dest="file_name")
    args = parser.parse_args()

base_path = "../learning/treelstm/data/"
sets = ["train", "dev", "test"]
for item in sets:
    stats = Stats()
    with open("{}{}/{}/sim.txt".format(base_path, args.file_name,
                                       item)) as file_reader:
        file_data = file_reader.readlines()
        for line in file_data:
            stats.inc(line.replace("\n", ""))
    print item
    total = 0.0 + stats["1"] + stats["2"]
    print stats["1"] / total, stats["2"] / total
Example #6
0
from parser.lc_quad_linked import LC_Qaud_Linked
import json
from kb.dbpedia import DBpedia
from parser.answerparser import AnswerParser
from common.utility.stats import Stats
from common.container.answerset import AnswerSet
from tqdm import tqdm

stats = Stats()

with open("../output/nliwod_origin.json") as data_file:
    nliwod = json.load(data_file)

ds = LC_Qaud_Linked(path="../data/LC-QUAD/linked_answer6.json")
ds.load()
ds.parse()

kb = DBpedia()
parser = AnswerParser(kb)
i = 0
for qapair in tqdm(ds.qapairs):
    nliwod_row = nliwod[i]
    if qapair.id == nliwod_row["q_id"]:
        query = nliwod_row["candidate"]
        raw_result = kb.query(query)
        result = AnswerSet(raw_result[1], parser.parse_queryresult)
        nliwod_row["answer"] = raw_result[1]
        if qapair.answerset == result:
            stats.inc("correct")
            nliwod_row["correct"] = True
        else: