def test_function_filter_iterator(): expression = '?p = <http://purl.org/goodrelations/price> && isLiteral(?o) && !isNumeric(?o)' iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done) = engine.execute(iterator, math.inf) assert len(results) == 1
def test_filter_iterator_interrupt(): expression = "?p = <http://schema.org/eligibleRegion>" iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done) = engine.execute(iterator, 10e-7) assert len(results) <= 4 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country1', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country4', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ] tmp = len(results) reloaded = load(saved.SerializeToString(), DummyDataset(hdtDoc, 'watdiv100')) (results, saved, done) = engine.execute(reloaded, 10e7) assert len(results) + tmp == 4 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country1', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country4', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ] assert done
def test_operation_filter_iterator(): expression = "10 = 5 * 2" iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done) = engine.execute(iterator, math.inf) assert len(results) == 9
def test_projection_read_stopped(): iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card) proj = ProjectionIterator(scan, ['?common']) (results, saved, done) = engine.execute(proj, 10e-4) assert len(results) <= card for res in results: assert '?common' in res and '?s1' not in res
def build_join_plan(bgp, db_connector, projection=None): """Build a join plan between a BGP and a possible OPTIONAL clause""" iterator, query_vars, cardinalities = build_left_plan(bgp, db_connector) # if optional is not None: # iterator, query_vars, c = build_left_plan(optional, db_connector, source=iterator, base_vars=query_vars, optional=True) # cardinalities += c values = projection if projection is not None else query_vars return ProjectionIterator(iterator, values), cardinalities
def test_and_or_filter_iterator(): expression = "?p = <http://schema.org/eligibleRegion> && (?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country0> || ?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country9>)" iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done) = engine.execute(iterator, math.inf) assert len(results) == 2 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ]
def parse_query_node(node, dataset, current_graphs, server_url, cardinalities): """ Recursively parse node in the query logical plan to build a preemptable physical query execution plan. Args: * node - Node of the logical plan to parse (in rdflib format) * dataset - RDF dataset used to execute the query * current_graphs - List of IRI of the current RDF graph queried * server_url - URL of the SaGe server * cardinalities - Map<triple,integer> used to track triple patterns cardinalities """ if node.name == 'SelectQuery': # in case of a FROM clause, set the new default graphs used graphs = current_graphs if node.datasetClause is not None: graphs = [format_graph_uri(format_term(graph_iri.default), server_url) for graph_iri in node.datasetClause] return parse_query_node(node.p, dataset, graphs, server_url, cardinalities) elif node.name == 'Project': query_vars = list(map(lambda t: '?' + str(t), node._vars)) child = parse_query_node(node.p, dataset, current_graphs, server_url, cardinalities) return ProjectionIterator(child, query_vars) elif node.name == 'BGP': # bgp_vars = node._vars triples = list(localize_triple(node.triples, current_graphs)) iterator, query_vars, c = build_left_plan(triples, dataset, current_graphs) # track cardinalities of every triple pattern cardinalities += c return iterator elif node.name == 'Union': left = parse_query_node(node.p1, dataset, current_graphs, server_url, cardinalities) right = parse_query_node(node.p2, dataset, current_graphs, server_url, cardinalities) return BagUnionIterator(left, right) elif node.name == 'Filter': expression = parse_filter_expr(node.expr) iterator = parse_query_node(node.p, dataset, current_graphs, server_url, cardinalities) return FilterIterator(iterator, expression) elif node.name == 'Join': # only allow for joining BGPs from different GRAPH clauses triples = fetch_graph_triples(node.p1, current_graphs, server_url) + fetch_graph_triples(node.p2, current_graphs, server_url) iterator, query_vars, c = build_left_plan(triples, dataset, current_graphs) # track cardinalities of every triple pattern cardinalities += c return iterator else: raise UnsupportedSPARQL("Unsupported SPARQL feature: {}".format(node.name))
def build_join_plan(bgp, dataset, default_graph): """Build a join plan with a projection at the end""" iterator, query_vars, cardinalities = build_left_plan(bgp, dataset, default_graph) return ProjectionIterator(iterator, query_vars), cardinalities
def load_projection(saved_plan, dataset): """Load a ProjectionIterator from a protobuf serialization""" sourceField = saved_plan.WhichOneof('source') source = load(getattr(saved_plan, sourceField), dataset) values = saved_plan.values if len(saved_plan.values) > 0 else None return ProjectionIterator(source, values)