Exemple #1
0
def get_evaluated_queries(dataset, cached, parameters, n_top=2000):
    """Returns evaluated queries.

    :rtype list[EvaluationQuery]
    :param dataset:
    :param config:
    :param cached:
    :param parameters:
    :param n_top:
    :return:
    """
    queries = []
    if cached:
        queries = get_cached_evaluated_queries(dataset,
                                               parameters)
    if not queries:
        # Note: we use the default scorer here, but with parameters
        # of the selected scorer.
        translator = QueryTranslator.init_from_config()
        candidate_scorer = ranker.LiteralRanker('DefaultScorer')
        candidate_scorer.parameters = parameters
        translator.set_scorer(candidate_scorer)
        queries = load_eval_queries(dataset)
        # We evaluate the queries here, so that in subsequent runs, we already
        # know which candidate is correct etc. and do not have to perform the
        # same calculations again.
        _, queries = evaluate_translator(translator,
                                         queries,
                                         n_top=n_top,
                                         ignore_invalid=False,
                                         output_result=False)
        if cached:
            cache_evaluated_queries(dataset, queries, parameters)
    return queries
Exemple #2
0
def get_evaluated_queries(dataset, cached, parameters, n_top,
                          prune_for_training):
    """Returns evaluated queries.

    :rtype list[EvaluationQuery]
    :param dataset:
    :param config:
    :param cached:
    :param parameters:
    :param n_top:
    :return:
    """
    queries = []
    if cached:
        queries = get_cached_evaluated_queries(dataset, parameters)
    if not queries:
        # Note: we use the default scorer here, but with parameters
        # of the selected scorer.
        translator = QueryTranslator.init_from_config()
        candidate_ranker = ranker.LiteralRanker('DefaultScorer')
        candidate_ranker.parameters = parameters
        translator.set_ranker(candidate_ranker)
        queries = load_eval_queries(dataset)
        # We evaluate the queries here, so that in subsequent runs, we already
        # know which candidate is correct etc. and do not have to perform the
        # same calculations again.
        _, queries = evaluate_translator(translator,
                                         queries,
                                         n_top=n_top,
                                         ignore_invalid=False,
                                         output_result=False,
                                         prune_for_training=prune_for_training)
        if cached:
            cache_evaluated_queries(dataset, queries, parameters)
    return queries
Exemple #3
0
def main():
    import argparse
    parser = argparse.ArgumentParser(description="Console based translation.")
    parser.add_argument("ranker_name",
                        default="WQ_Ranker",
                        help="The ranker to use.")
    parser.add_argument("--config",
                        default="config.cfg",
                        help="The configuration file to use.")
    args = parser.parse_args()
    globals.read_configuration(args.config)
    if args.ranker_name not in scorer_globals.scorers_dict:
        logger.error("%s is not a valid ranker" % args.ranker_name)
        logger.error("Valid rankers are: %s " %
                     (" ".join(scorer_globals.scorers_dict.keys())))
    logger.info("Using ranker %s" % args.ranker_name)
    ranker = scorer_globals.scorers_dict[args.ranker_name]
    translator = QueryTranslator.init_from_config()
    translator.set_scorer(ranker)
    while True:
        sys.stdout.write("enter question> ")
        sys.stdout.flush()
        query = sys.stdin.readline().strip()
        logger.info("Translating query: %s" % query)
        results = translator.translate_and_execute_query(query)
        logger.info("Done translating query: %s" % query)
        logger.info("#candidates: %s" % len(results))
        if len(results) > 0:
            best_candidate = results[0].query_candidate

            for result in results:
                candidate = result.query_candidate
                relation = candidate.relations[-1]
                #last_node = candidate.nodes[-1]
                print candidate.graph_as_simple_string()
                print candidate.get_result(include_name=True)
                print candidate.pattern
                print relation.name
                print relation.source_node.entity.entity.name
                print ""
                #print len(candidate.relations), candidate.pattern
                #print last_node.name

            sparql_query = best_candidate.to_sparql_query()
            result_rows = results[0].query_result_rows
            result = []
            # Usually we get a name + mid.
            for r in result_rows:
                if len(r) > 1:
                    result.append("%s (%s)" % (r[1], r[0]))
                else:
                    result.append("%s" % r[0])
            logger.info("SPARQL query: %s" % sparql_query)
            logger.info("Result: %s " % " ".join(result))
def main():
    import argparse
    parser = argparse.ArgumentParser(description="Console based translation.")
    parser.add_argument("ranker_name",
                        default="WQ_Ranker",
                        help="The ranker to use.")
    parser.add_argument("--config",
                        default="config.cfg",
                        help="The configuration file to use.")
    args = parser.parse_args()
    globals.read_configuration(args.config)
    if args.ranker_name not in scorer_globals.scorers_dict:
        logger.error("%s is not a valid ranker" % args.ranker_name)
        logger.error("Valid rankers are: %s " % (" ".join(scorer_globals.scorers_dict.keys())))
    logger.info("Using ranker %s" % args.ranker_name)
    ranker = scorer_globals.scorers_dict[args.ranker_name]
    translator = QueryTranslator.init_from_config()
    translator.set_scorer(ranker)
    while True:
        sys.stdout.write("enter question> ")
        sys.stdout.flush()
        query = sys.stdin.readline().strip()
        logger.info("Translating query: %s" % query)
        results = translator.translate_and_execute_query(query)
        logger.info("Done translating query: %s" % query)
        logger.info("#candidates: %s" % len(results))
        if len(results) > 0:
            best_candidate = results[0].query_candidate
            sparql_query = best_candidate.to_sparql_query()
            result_rows = results[0].query_result_rows
            result = []
            # Usually we get a name + mid.
            for r in result_rows:
                if len(r) > 1:
                    result.append("%s (%s)" % (r[1], r[0]))
                else:
                    result.append("%s" % r[0])
            logger.info("SPARQL query: %s" % sparql_query)
            logger.info("Result: %s " % " ".join(result))
Exemple #5
0
def main() -> None:
    """
    Entry point into the program
    """
    import argparse
    parser = argparse.ArgumentParser(description="REST api based translation.")
    parser.add_argument("ranker_name",
                        default="WQ_Ranker",
                        help="The ranker to use.")
    parser.add_argument('--override',
                        default='{}',
                        help='Override parameters of the ranker with JSON map')
    parser.add_argument("--config",
                        default="config.cfg",
                        help="The configuration file to use.")
    parser.add_argument("--port",
                        type=int,
                        default=8090,
                        help="The TCP port to use")
    args = parser.parse_args()
    config_helper.read_configuration(args.config)
    if args.ranker_name not in scorer_globals.scorers_dict:
        LOG.error("%s is not a valid ranker", args.ranker_name)
        LOG.error("Valid rankers are: %s ",
                  " ".join(list(scorer_globals.scorers_dict.keys())))
        sys.exit(1)
    LOG.info("Using ranker %s", args.ranker_name)
    override = json.loads(args.override)
    if override != {}:
        LOG.info('overrides: %s', json.dumps(override))
    ranker_conf = scorer_globals.scorers_dict[args.ranker_name]
    ranker = ranker_conf.instance(override)
    translator = QueryTranslator.init_from_config()
    translator.set_ranker(ranker)

    # using closures prevents us from having to make translator global
    @APP.route('/', methods=['GET'])
    def translate():  # pylint: disable=unused-variable
        """
        REST entry point providing a very simple query interface
        """
        raw_query = flask.request.args.get('q', "")
        LOG.info("Translating query: %s", raw_query)
        parsed_query, candidates = translator.translate_and_execute_query(
            raw_query)
        LOG.info("Done translating query: %s", raw_query)
        LOG.info("#candidates: %s", len(candidates))
        return flask.jsonify(
            map_candidates(raw_query, parsed_query, candidates))

    @APP.route('/lookupid', methods=['GET'])
    def lookupid():
        """
        REST entry point for looking up a human readable name for an entity
        """
        mid = flask.request.args.get('id', "")
        entity = translator.entity_index.get_entity_for_mid(mid)
        result = None
        if entity:
            result = map_entity(entity)
        return flask.jsonify(result)

    @APP.route('/config', methods=['GET'])
    def get_config():
        """
        REST entry point providing information about the current configuration
        """
        result = {
            'ranker_name': ranker_conf.name,
            'override': ranker_conf.override(),
            'config': ranker_conf.config()
        }
        return flask.jsonify(result)

    APP.run(use_reloader=False,
            host='0.0.0.0',
            threaded=False,
            port=args.port,
            debug=False)
def main():
    import argparse
    parser = argparse.ArgumentParser(description = "Console based translation.")
    parser.add_argument("ranker_name",
                        default = "WQ_Ranker",
                        help = "The ranker to use.")
    parser.add_argument("--config",
                        default = "config.cfg",
                        help = "The configuration file to use.")
    args = parser.parse_args()
    globals.read_configuration(args.config)
    if args.ranker_name not in scorer_globals.scorers_dict:
        logger.error("%s is not a valid ranker" % args.ranker_name)
        logger.error("Valid rankers are: %s " % (" ".join(scorer_globals.scorers_dict.keys())))
    logger.info("Using ranker %s" % args.ranker_name)
    ranker = scorer_globals.scorers_dict[args.ranker_name]
    translator = QueryTranslator.init_from_config()
    translator.set_scorer(ranker)

    writeFile(test_file, "", "w")

    linker = translator.entity_linker
    entities = linker.surface_index.get_entities_for_surface("spanish")
    for (e, score) in entities:
        print e.name, score

    """
    for i in xrange(len(rank_error)):
        query = rank_error[i]
        results = translator.translate_and_execute_query(query)
        if (len(results) > 0):
            correct = results[rank_pos[i]].query_candidate

            candidate = results[0].query_candidate
            sparql_query = candidate.to_sparql_query()
            correct_query = correct.to_sparql_query()

            result_rows = results[0].query_result_rows
            result = []
            for r in result_rows:
                if len(r) > 1:
                    result.append("%s (%s)" % (r[1], r[0]))
                else:
                    result.append("%s" % r[0])
            correct_result_rows = results[rank_pos[i]].query_result_rows
            correct_result = []
            for r in correct_result_rows:
                if len(r) > 1:
                    correct_result.append("%s (%s)" % (r[1], r[0]))
                else:
                    correct_result.append("%s" % r[0])

            extractor = FeatureExtractor(True, False, None)
            features = extractor.extract_features(candidate)
            y_features = extractor.extract_features(correct)
            diff = feature_diff(features, y_features)

            X = ranker.dict_vec.transform(diff)
            if ranker.scaler:
                X = ranker.scaler.transform(X)
            ranker.model.n_jobs = 1
            p = ranker.model.predict(X)
            c = ranker.label_encoder.inverse_transform(p)
            res = c[0]

            root_name = "Root Node: %s\n" % (candidate.root_node.entity.name.encode('utf-8'))
            query_str = "SPARQL query: %s\n" % (sparql_query.encode('utf-8'))
            graph_str = "Candidate Graph: %s\n" % (candidate.graph_as_string().encode('utf-8'))
            graph_str_simple = "Simple Candidate Graph: %s" % (candidate.graph_as_simple_string().encode('utf-8'))
            y_graph_str_simple = "Answer Candidate Graph: %s" % (correct.graph_as_simple_string().encode('utf-8'))
            result_str = "Result: %s\n" % ((" ".join(result)).encode('utf-8'))
            correct_result_str = "Correct Result: %s\n" % ((" ".join(correct_result)).encode('utf-8'))

            feature_str = "Result Features: %s\n" % (str(features).encode('utf-8'))
            y_feature_str = "Answer Features: %s\n" %(str(y_features).encode('utf-8'))
            diff_str = "Feature Diff: %s\n" %(str(diff).encode('utf-8'))

            x_str = "X vector: %s\n" % (str(X).encode('utf-8'))
            p_str = "Predict vector: %s\n" % (str(p).encode('utf-8'))
            c_str = "C vector: %s\n" % (str(c).encode('utf-8'))
            cmp_res = "Compare result: %d\n" % (res)

            writeFile(test_file, root_name, "a")
            writeFile(test_file, result_str, "a")
            writeFile(test_file, correct_result_str, "a")

            writeFile(test_file, graph_str_simple, "a")
            writeFile(test_file, y_graph_str_simple, "a")

            writeFile(test_file, feature_str, "a")
            writeFile(test_file, y_feature_str, "a")
            writeFile(test_file, diff_str, "a")

            writeFile(test_file, x_str, "a")
            writeFile(test_file, p_str, "a")
            writeFile(test_file, c_str, "a")
            writeFile(test_file, cmp_res, "a")
        writeFile(test_file, "\n", "a")
    """

    """
    for query in test_set + unidentified:
        results = translator.translate_and_execute_query(query)
        if (len(results) > 0):
            for i in xrange(len(results)):
                if (i > 10):
                    break
                candidate = results[i].query_candidate
                sparql_query = candidate.to_sparql_query()
                result_rows = results[i].query_result_rows
                result = []
                for r in result_rows:
                    if len(r) > 1:
                        result.append("%s (%s)" % (r[1], r[0]))
                    else:
                        result.append("%s" % r[0])

                extractor = FeatureExtractor(True, False, None)
                features = extractor.extract_features(candidate)

                root_name = "%d Root Node: %s\n" % (i+1, candidate.root_node.entity.name.encode('utf-8'))
                query_str = "%d SPARQL query: %s\n" % (i+1, sparql_query.encode('utf-8'))
                graph_str = "%d Candidate Graph: %s\n" % (i+1, candidate.graph_as_string().encode('utf-8'))
                graph_str_simple = "%d Simple Candidate Graph: %s" % (i+1, candidate.graph_as_simple_string().encode('utf-8'))
                result_str = "%d Result: %s\n" % (i+1, (" ".join(result)).encode('utf-8'))
                feature_str = "%d Features: %s\n" % (i+1, str(features).encode('utf-8'))
                writeFile(test_file, root_name, "a")
                #writeFile(test_file, graph_str, "a")
                writeFile(test_file, graph_str_simple, "a")
                writeFile(test_file, feature_str, "a")
                #writeFile(test_file, query_str, "a")
                writeFile(test_file, result_str, "a")
        writeFile(test_file, "\n", "a")
    """

    while True:
        sys.stdout.write("enter question> ")
        sys.stdout.flush()
        query = sys.stdin.readline().strip()
        logger.info("Translating query: %s" % query)
        results = translator.translate_and_execute_query(query)
        logger.info("Done translating query: %s" % query)
        logger.info("#candidates: %s" % len(results))
        if len(results) > 0:
            best_candidate = results[0].query_candidate
            sparql_query = best_candidate.to_sparql_query()
            result_rows = results[0].query_result_rows
            result = []
            # Usually we get a name + mid.
            for r in result_rows:
                if len(r) > 1:
                    result.append("%s (%s)" % (r[1], r[0]))
                else:
                    result.append("%s" % r[0])
            logger.info("SPARQL query: %s" % sparql_query)
            logger.info("Result: %s " % " ".join(result))