def predict_target_with_query( sparql, query, source, timeout=TIMEOUT, limit=LIMIT): """Predicts target with given query. For example for pagerank_bidi: SELECT distinct(?target) ?score { { dbr:Circle ?p ?target .} UNION { ?target ?q dbr:Circle . } ?target dbo:wikiPageRank ?score } ORDER BY DESC(?score) LIMIT 100 """ q = query % {'source': source.n3()} q += '\nLIMIT %d' % limit t, q_res = gp_query._query(sparql, timeout, q) res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[]) ) target_scores = [ (get_path(row, [TARGET_VAR]), float(get_path(row, [Variable('score')]))) for row in bindings] # print(target_scores) return target_scores
def predict_target_with_query( sparql, query, source, timeout=TIMEOUT, limit=LIMIT): """Predicts target with given query. For example for pagerank_bidi: SELECT distinct(?target) ?score { { dbr:Circle ?p ?target .} UNION { ?target ?q dbr:Circle . } ?target dbo:wikiPageRank ?score } ORDER BY DESC(?score) LIMIT 100 """ q = query % {'source': source.n3()} q += '\nLIMIT %d' % limit t, q_res = gp_query._query(sparql, timeout, q) res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[]) ) target_scores = [ (get_path(row, [TARGET_VAR]), get_path(row, [Variable('score')])) for row in bindings] # print(target_scores) return target_scores
def _q(sparql, timeout, query, selectors): t, q_res = gp_query._query(sparql, timeout, query) res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[])) target_scores = [[get_path(row, [sel]) for sel in selectors] for row in bindings] return target_scores
def _ask_chunk_result_extractor(q_res, _vars, _ret_val_mapping): chunk_res = {} res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[])) for row in bindings: row_res = tuple([get_path(row, [v]) for v in _vars]) stps = _ret_val_mapping[row_res] chunk_res.update({stp: True for stp in stps}) return chunk_res
def _predict_chunk_res(q_res, *_): chunk_res = defaultdict(list) res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[])) for row in bindings: s = get_path(row, [SOURCE_VAR]) t = get_path(row, [TARGET_VAR]) chunk_res[s].append(t) return chunk_res
def _q(sparql, timeout, query, selectors): t, q_res = gp_query._query(sparql, timeout, query) res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[]) ) target_scores = [ [get_path(row, [sel]) for sel in selectors] for row in bindings] return target_scores
def _predict_chunk_res(q_res, *_): chunk_res = defaultdict(list) res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[]) ) for row in bindings: s = get_path(row, [SOURCE_VAR]) t = get_path(row, [TARGET_VAR]) chunk_res[s].append(t) return chunk_res
def _ask_chunk_result_extractor(q_res, _vars, _ret_val_mapping): chunk_res = {} res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[]) ) for row in bindings: row_res = tuple([get_path(row, [v]) for v in _vars]) stps = _ret_val_mapping[row_res] chunk_res.update({stp: True for stp in stps}) return chunk_res
def _var_subst_chunk_result_ext(q_res, _sel_var_and_vars, _, **kwds): var, _vars = _sel_var_and_vars chunk_res = Counter() res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[])) for row in bindings: row_res = get_path(row, [var]) count_res = int(get_path(row, [COUNT_VAR], '0')) chunk_res[row_res] += count_res return chunk_res
def _combined_chunk_res(q_res, _vars, _ret_val_mapping): chunk_res = {} res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[])) for row in bindings: row_res = tuple([get_path(row, [v]) for v in _vars]) stps = _ret_val_mapping[row_res] ask_res = int(get_path(row, [ASK_VAR], '0')) count_res = int(get_path(row, [COUNT_VAR], '0')) chunk_res.update({stp: (ask_res, count_res) for stp in stps}) return chunk_res
def _var_subst_chunk_result_ext(q_res, _sel_var_and_vars, _, **kwds): var, _vars = _sel_var_and_vars chunk_res = Counter() res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[]) ) for row in bindings: row_res = get_path(row, [var]) count_res = int(get_path(row, [COUNT_VAR], '0')) chunk_res[row_res] += count_res return chunk_res
def _combined_chunk_res(q_res, _vars, _ret_val_mapping): chunk_res = {} res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[]) ) for row in bindings: row_res = tuple([get_path(row, [v]) for v in _vars]) stps = _ret_val_mapping[row_res] ask_res = int(get_path(row, [ASK_VAR], '0')) count_res = int(get_path(row, [COUNT_VAR], '0')) chunk_res.update({stp: (ask_res, count_res) for stp in stps}) return chunk_res
def predict_query(sparql, timeout, graph_pattern, source, limit=config.PREDICTION_RESULT_LIMIT): """Performs a single query starting at ?SOURCE returning all ?TARGETs.""" assert isinstance(graph_pattern, GraphPattern) assert isinstance(source, Identifier) _query_stats.predict_query_count += 1 vars_in_graph = graph_pattern.vars_in_graph if TARGET_VAR not in vars_in_graph: logger.warning( 'graph pattern without %s used for prediction:\n%r', TARGET_VAR.n3(), graph_pattern ) return timeout, set() q = graph_pattern.to_sparql_select_query( projection=[TARGET_VAR], distinct=True, bind={SOURCE_VAR: source}, limit=limit, ) try: t, q_res = _query(sparql, timeout, q) except (SPARQLWrapperException, SAXParseException, URLError): logger.warning( 'Exception occurred during prediction, assuming empty result...\n' 'Query:\n%s\nException:', q, exc_info=1, # appends exception to message ) t, q_res = timeout, {} else: if query_time_soft_exceeded(t, timeout): kind = 'hard' if query_time_hard_exceeded(t, timeout) else 'soft' logger.info( 'prediction query exceeded %s timeout %s:\n%s', kind, t, q ) res = [] res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[]) ) for row in bindings: res.append(get_path(row, [TARGET_VAR])) return timeout, set(res)
def predict_query(sparql, timeout, graph_pattern, source, limit=config.PREDICTION_RESULT_LIMIT): """Performs a single query starting at ?SOURCE returning all ?TARGETs.""" assert isinstance(graph_pattern, GraphPattern) assert isinstance(source, Identifier) _query_stats.predict_query_count += 1 vars_in_graph = graph_pattern.vars_in_graph if TARGET_VAR not in vars_in_graph: logger.warning('graph pattern without %s used for prediction:\n%r', TARGET_VAR.n3(), graph_pattern) return timeout, [] q = graph_pattern.to_sparql_select_query( projection=[TARGET_VAR], distinct=True, bind={SOURCE_VAR: source}, limit=limit, ) try: t, q_res = _query(sparql, timeout, q) except (SPARQLWrapperException, SAXParseException, URLError): logger.warning( 'Exception occurred during prediction, assuming empty result...\n' 'Query:\n%s\nException:', q, exc_info=1, # appends exception to message ) t, q_res = timeout, {} else: if query_time_soft_exceeded(t, timeout): kind = 'hard' if query_time_hard_exceeded(t, timeout) else 'soft' logger.info('prediction query exceeded %s timeout %s:\n%s', kind, t, q) res = [] res_rows_path = ['results', 'bindings'] bindings = sparql_json_result_bindings_to_rdflib( get_path(q_res, res_rows_path, default=[])) for row in bindings: res.append(get_path(row, [TARGET_VAR])) return timeout, set(res)