def get_evaluated_queries(dataset, cached, parameters, n_top=2000): """Returns evaluated queries. :rtype list[EvaluationQuery] :param dataset: :param config: :param cached: :param parameters: :param n_top: :return: """ queries = [] if cached: queries = get_cached_evaluated_queries(dataset, parameters) if not queries: # Note: we use the default scorer here, but with parameters # of the selected scorer. translator = QueryTranslator.init_from_config() candidate_scorer = ranker.LiteralRanker('DefaultScorer') candidate_scorer.parameters = parameters translator.set_scorer(candidate_scorer) queries = load_eval_queries(dataset) # We evaluate the queries here, so that in subsequent runs, we already # know which candidate is correct etc. and do not have to perform the # same calculations again. _, queries = evaluate_translator(translator, queries, n_top=n_top, ignore_invalid=False, output_result=False) if cached: cache_evaluated_queries(dataset, queries, parameters) return queries
def get_evaluated_queries(dataset, cached, parameters, n_top, prune_for_training): """Returns evaluated queries. :rtype list[EvaluationQuery] :param dataset: :param config: :param cached: :param parameters: :param n_top: :return: """ queries = [] if cached: queries = get_cached_evaluated_queries(dataset, parameters) if not queries: # Note: we use the default scorer here, but with parameters # of the selected scorer. translator = QueryTranslator.init_from_config() candidate_ranker = ranker.LiteralRanker('DefaultScorer') candidate_ranker.parameters = parameters translator.set_ranker(candidate_ranker) queries = load_eval_queries(dataset) # We evaluate the queries here, so that in subsequent runs, we already # know which candidate is correct etc. and do not have to perform the # same calculations again. _, queries = evaluate_translator(translator, queries, n_top=n_top, ignore_invalid=False, output_result=False, prune_for_training=prune_for_training) if cached: cache_evaluated_queries(dataset, queries, parameters) return queries
def main(): import argparse parser = argparse.ArgumentParser(description="Console based translation.") parser.add_argument("ranker_name", default="WQ_Ranker", help="The ranker to use.") parser.add_argument("--config", default="config.cfg", help="The configuration file to use.") args = parser.parse_args() globals.read_configuration(args.config) if args.ranker_name not in scorer_globals.scorers_dict: logger.error("%s is not a valid ranker" % args.ranker_name) logger.error("Valid rankers are: %s " % (" ".join(scorer_globals.scorers_dict.keys()))) logger.info("Using ranker %s" % args.ranker_name) ranker = scorer_globals.scorers_dict[args.ranker_name] translator = QueryTranslator.init_from_config() translator.set_scorer(ranker) while True: sys.stdout.write("enter question> ") sys.stdout.flush() query = sys.stdin.readline().strip() logger.info("Translating query: %s" % query) results = translator.translate_and_execute_query(query) logger.info("Done translating query: %s" % query) logger.info("#candidates: %s" % len(results)) if len(results) > 0: best_candidate = results[0].query_candidate for result in results: candidate = result.query_candidate relation = candidate.relations[-1] #last_node = candidate.nodes[-1] print candidate.graph_as_simple_string() print candidate.get_result(include_name=True) print candidate.pattern print relation.name print relation.source_node.entity.entity.name print "" #print len(candidate.relations), candidate.pattern #print last_node.name sparql_query = best_candidate.to_sparql_query() result_rows = results[0].query_result_rows result = [] # Usually we get a name + mid. for r in result_rows: if len(r) > 1: result.append("%s (%s)" % (r[1], r[0])) else: result.append("%s" % r[0]) logger.info("SPARQL query: %s" % sparql_query) logger.info("Result: %s " % " ".join(result))
def main(): import argparse parser = argparse.ArgumentParser(description="Console based translation.") parser.add_argument("ranker_name", default="WQ_Ranker", help="The ranker to use.") parser.add_argument("--config", default="config.cfg", help="The configuration file to use.") args = parser.parse_args() globals.read_configuration(args.config) if args.ranker_name not in scorer_globals.scorers_dict: logger.error("%s is not a valid ranker" % args.ranker_name) logger.error("Valid rankers are: %s " % (" ".join(scorer_globals.scorers_dict.keys()))) logger.info("Using ranker %s" % args.ranker_name) ranker = scorer_globals.scorers_dict[args.ranker_name] translator = QueryTranslator.init_from_config() translator.set_scorer(ranker) while True: sys.stdout.write("enter question> ") sys.stdout.flush() query = sys.stdin.readline().strip() logger.info("Translating query: %s" % query) results = translator.translate_and_execute_query(query) logger.info("Done translating query: %s" % query) logger.info("#candidates: %s" % len(results)) if len(results) > 0: best_candidate = results[0].query_candidate sparql_query = best_candidate.to_sparql_query() result_rows = results[0].query_result_rows result = [] # Usually we get a name + mid. for r in result_rows: if len(r) > 1: result.append("%s (%s)" % (r[1], r[0])) else: result.append("%s" % r[0]) logger.info("SPARQL query: %s" % sparql_query) logger.info("Result: %s " % " ".join(result))
def main() -> None: """ Entry point into the program """ import argparse parser = argparse.ArgumentParser(description="REST api based translation.") parser.add_argument("ranker_name", default="WQ_Ranker", help="The ranker to use.") parser.add_argument('--override', default='{}', help='Override parameters of the ranker with JSON map') parser.add_argument("--config", default="config.cfg", help="The configuration file to use.") parser.add_argument("--port", type=int, default=8090, help="The TCP port to use") args = parser.parse_args() config_helper.read_configuration(args.config) if args.ranker_name not in scorer_globals.scorers_dict: LOG.error("%s is not a valid ranker", args.ranker_name) LOG.error("Valid rankers are: %s ", " ".join(list(scorer_globals.scorers_dict.keys()))) sys.exit(1) LOG.info("Using ranker %s", args.ranker_name) override = json.loads(args.override) if override != {}: LOG.info('overrides: %s', json.dumps(override)) ranker_conf = scorer_globals.scorers_dict[args.ranker_name] ranker = ranker_conf.instance(override) translator = QueryTranslator.init_from_config() translator.set_ranker(ranker) # using closures prevents us from having to make translator global @APP.route('/', methods=['GET']) def translate(): # pylint: disable=unused-variable """ REST entry point providing a very simple query interface """ raw_query = flask.request.args.get('q', "") LOG.info("Translating query: %s", raw_query) parsed_query, candidates = translator.translate_and_execute_query( raw_query) LOG.info("Done translating query: %s", raw_query) LOG.info("#candidates: %s", len(candidates)) return flask.jsonify( map_candidates(raw_query, parsed_query, candidates)) @APP.route('/lookupid', methods=['GET']) def lookupid(): """ REST entry point for looking up a human readable name for an entity """ mid = flask.request.args.get('id', "") entity = translator.entity_index.get_entity_for_mid(mid) result = None if entity: result = map_entity(entity) return flask.jsonify(result) @APP.route('/config', methods=['GET']) def get_config(): """ REST entry point providing information about the current configuration """ result = { 'ranker_name': ranker_conf.name, 'override': ranker_conf.override(), 'config': ranker_conf.config() } return flask.jsonify(result) APP.run(use_reloader=False, host='0.0.0.0', threaded=False, port=args.port, debug=False)
def main(): import argparse parser = argparse.ArgumentParser(description = "Console based translation.") parser.add_argument("ranker_name", default = "WQ_Ranker", help = "The ranker to use.") parser.add_argument("--config", default = "config.cfg", help = "The configuration file to use.") args = parser.parse_args() globals.read_configuration(args.config) if args.ranker_name not in scorer_globals.scorers_dict: logger.error("%s is not a valid ranker" % args.ranker_name) logger.error("Valid rankers are: %s " % (" ".join(scorer_globals.scorers_dict.keys()))) logger.info("Using ranker %s" % args.ranker_name) ranker = scorer_globals.scorers_dict[args.ranker_name] translator = QueryTranslator.init_from_config() translator.set_scorer(ranker) writeFile(test_file, "", "w") linker = translator.entity_linker entities = linker.surface_index.get_entities_for_surface("spanish") for (e, score) in entities: print e.name, score """ for i in xrange(len(rank_error)): query = rank_error[i] results = translator.translate_and_execute_query(query) if (len(results) > 0): correct = results[rank_pos[i]].query_candidate candidate = results[0].query_candidate sparql_query = candidate.to_sparql_query() correct_query = correct.to_sparql_query() result_rows = results[0].query_result_rows result = [] for r in result_rows: if len(r) > 1: result.append("%s (%s)" % (r[1], r[0])) else: result.append("%s" % r[0]) correct_result_rows = results[rank_pos[i]].query_result_rows correct_result = [] for r in correct_result_rows: if len(r) > 1: correct_result.append("%s (%s)" % (r[1], r[0])) else: correct_result.append("%s" % r[0]) extractor = FeatureExtractor(True, False, None) features = extractor.extract_features(candidate) y_features = extractor.extract_features(correct) diff = feature_diff(features, y_features) X = ranker.dict_vec.transform(diff) if ranker.scaler: X = ranker.scaler.transform(X) ranker.model.n_jobs = 1 p = ranker.model.predict(X) c = ranker.label_encoder.inverse_transform(p) res = c[0] root_name = "Root Node: %s\n" % (candidate.root_node.entity.name.encode('utf-8')) query_str = "SPARQL query: %s\n" % (sparql_query.encode('utf-8')) graph_str = "Candidate Graph: %s\n" % (candidate.graph_as_string().encode('utf-8')) graph_str_simple = "Simple Candidate Graph: %s" % (candidate.graph_as_simple_string().encode('utf-8')) y_graph_str_simple = "Answer Candidate Graph: %s" % (correct.graph_as_simple_string().encode('utf-8')) result_str = "Result: %s\n" % ((" ".join(result)).encode('utf-8')) correct_result_str = "Correct Result: %s\n" % ((" ".join(correct_result)).encode('utf-8')) feature_str = "Result Features: %s\n" % (str(features).encode('utf-8')) y_feature_str = "Answer Features: %s\n" %(str(y_features).encode('utf-8')) diff_str = "Feature Diff: %s\n" %(str(diff).encode('utf-8')) x_str = "X vector: %s\n" % (str(X).encode('utf-8')) p_str = "Predict vector: %s\n" % (str(p).encode('utf-8')) c_str = "C vector: %s\n" % (str(c).encode('utf-8')) cmp_res = "Compare result: %d\n" % (res) writeFile(test_file, root_name, "a") writeFile(test_file, result_str, "a") writeFile(test_file, correct_result_str, "a") writeFile(test_file, graph_str_simple, "a") writeFile(test_file, y_graph_str_simple, "a") writeFile(test_file, feature_str, "a") writeFile(test_file, y_feature_str, "a") writeFile(test_file, diff_str, "a") writeFile(test_file, x_str, "a") writeFile(test_file, p_str, "a") writeFile(test_file, c_str, "a") writeFile(test_file, cmp_res, "a") writeFile(test_file, "\n", "a") """ """ for query in test_set + unidentified: results = translator.translate_and_execute_query(query) if (len(results) > 0): for i in xrange(len(results)): if (i > 10): break candidate = results[i].query_candidate sparql_query = candidate.to_sparql_query() result_rows = results[i].query_result_rows result = [] for r in result_rows: if len(r) > 1: result.append("%s (%s)" % (r[1], r[0])) else: result.append("%s" % r[0]) extractor = FeatureExtractor(True, False, None) features = extractor.extract_features(candidate) root_name = "%d Root Node: %s\n" % (i+1, candidate.root_node.entity.name.encode('utf-8')) query_str = "%d SPARQL query: %s\n" % (i+1, sparql_query.encode('utf-8')) graph_str = "%d Candidate Graph: %s\n" % (i+1, candidate.graph_as_string().encode('utf-8')) graph_str_simple = "%d Simple Candidate Graph: %s" % (i+1, candidate.graph_as_simple_string().encode('utf-8')) result_str = "%d Result: %s\n" % (i+1, (" ".join(result)).encode('utf-8')) feature_str = "%d Features: %s\n" % (i+1, str(features).encode('utf-8')) writeFile(test_file, root_name, "a") #writeFile(test_file, graph_str, "a") writeFile(test_file, graph_str_simple, "a") writeFile(test_file, feature_str, "a") #writeFile(test_file, query_str, "a") writeFile(test_file, result_str, "a") writeFile(test_file, "\n", "a") """ while True: sys.stdout.write("enter question> ") sys.stdout.flush() query = sys.stdin.readline().strip() logger.info("Translating query: %s" % query) results = translator.translate_and_execute_query(query) logger.info("Done translating query: %s" % query) logger.info("#candidates: %s" % len(results)) if len(results) > 0: best_candidate = results[0].query_candidate sparql_query = best_candidate.to_sparql_query() result_rows = results[0].query_result_rows result = [] # Usually we get a name + mid. for r in result_rows: if len(r) > 1: result.append("%s (%s)" % (r[1], r[0])) else: result.append("%s" % r[0]) logger.info("SPARQL query: %s" % sparql_query) logger.info("Result: %s " % " ".join(result))