async def test_nlj_interrupt(): context = {'quantum': 10e7, 'max_results': 10e-5} left_scan = ScanIterator(hdtDoc, triple, context) right_scan = ScanIterator(hdtDoc, innerTriple, context) join = IndexJoinIterator(left_scan, right_scan, context) (results, saved, done, _) = await engine.execute(join, context) assert len(results) <= 20
async def test_nlj_read(): context = {'quantum': 10e7, 'max_results': 10e7} left_scan = ScanIterator(hdtDoc, triple, context) right_scan = ScanIterator(hdtDoc, innerTriple, context) join = IndexJoinIterator(left_scan, right_scan, context) (results, saved, done, _) = await engine.execute(join, context) assert len(results) == 20 for res in results: assert '?s1' in res and '?s2' in res and '?common' in res assert done
async def test_nlj_interrupt(): iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card) join = IndexJoinIterator(scan, innerTriple, hdtDoc) (results, saved, done, _) = await engine.execute(join, 10e-5) assert len(results) <= 20
async def test_scan_read(): iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card) (results, saved, done, _) = await engine.execute(scan, 10e7) #print(results) assert len(results) > 0 assert done
async def test_operation_filter_iterator(): context = { 'quantum': 10e7, 'max_results': 10e7 } expression = "10 = 5 * 2" scan = ProjectionIterator(ScanIterator(hdtDoc, triple, context), context) iterator = FilterIterator(scan, expression, context) (results, saved, done, _) = await engine.execute(iterator, context) assert len(results) == 9
async def test_filter_iterator_interrupt(): context = { 'quantum': 10e-7, 'max_results': 10e7 } expression = "?p = <http://schema.org/eligibleRegion>" scan = ProjectionIterator(ScanIterator(hdtDoc, triple, context), context) iterator = FilterIterator(scan, expression, context) (results, saved, done, _) = await engine.execute(iterator, context) assert len(results) <= 4 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country1', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country4', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ] tmp = len(results) context['quantum'] = 10e7 reloaded = load(saved.SerializeToString(), DummyDataset(hdtDoc, 'watdiv100'), context) (results, saved, done, _) = await engine.execute(reloaded, context) assert len(results) + tmp == 4 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country1', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country4', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ] assert done
async def test_filter_iterator_interrupt(): expression = "?p = <http://schema.org/eligibleRegion>" iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done, _) = await engine.execute(iterator, 10e-7, 2) assert len(results) <= 4 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country1', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country4', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ] tmp = len(results) reloaded = load(saved.SerializeToString(), DummyDataset(hdtDoc, 'watdiv100')) (results, saved, done, _) = await engine.execute(reloaded, 10e7) assert len(results) + tmp == 4 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country1', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country4', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ] assert done
def _initInnerLoop(self, triple: Dict[str, str], mappings: Optional[Dict[str, str]], last_read: Optional[str] = None) -> PreemptableIterator: """Create an iterator to evaluates an inner loop in the Index Loop join algorithm. Args: * triple: Triple pattern to join with. * mappings: Input solution mappings for the join. * last_read: An offset ID used to resume processing of an inner loop. Returns: An iterator used to evaluate the inner loop. """ if mappings is None: return EmptyIterator(triple) (s, p, o) = (find_in_mappings(triple['subject'], mappings), find_in_mappings(triple['predicate'], mappings), find_in_mappings(triple['object'], mappings)) iterator, card = self._graph.search(s, p, o, last_read=last_read, as_of=self._start_timestamp) if card == 0: return None return ScanIterator(iterator, tuple_to_triple(s, p, o), card)
async def test_operation_filter_iterator(): expression = "10 = 5 * 2" iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done, _) = await engine.execute(iterator, math.inf) assert len(results) == 9
async def test_function_filter_iterator(): expression = '?p = <http://purl.org/goodrelations/price> && isLiteral(?o) && !isNumeric(?o)' iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done, _) = await engine.execute(iterator, math.inf) assert len(results) == 1
def _initInnerLoop(self, triple, mappings, last_read=None): (s, p, o) = (apply_bindings(triple['subject'], mappings), apply_bindings(triple['predicate'], mappings), apply_bindings(triple['object'], mappings)) iterator, card = self._hdtDocument.search(s, p, o, last_read=last_read) if card == 0: return None return ScanIterator(iterator, tuple_to_triple(s, p, o), card)
async def test_rowbind(): iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object']) scan=ScanIterator(iterator, triple, card) bind=BindIterator(scan,"MD5(CONCAT(STR(?s),STR('http://isa'),STR(?o)))",'?z') (results, saved, done, _) = await engine.execute(bind, 10e7) assert len(results) > 0 assert done
def load_scan(saved_plan, dataset): """Load a ScanIterator from a protobuf serialization""" triple = saved_plan.triple s, p, o, g = (triple.subject, triple.predicate, triple.object, triple.graph) iterator, card = dataset.get_graph(g).search( s, p, o, last_read=saved_plan.last_read) return ScanIterator(iterator, protoTriple_to_dict(triple), saved_plan.cardinality)
async def test_projection_read_stopped(): iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card) proj = ProjectionIterator(scan, ['?common']) (results, saved, done, _) = await engine.execute(proj, 10e-4) assert len(results) <= card for res in results: assert '?common' in res and '?s1' not in res
async def test_nlj_read(): iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card) join = IndexJoinIterator(scan, innerTriple, hdtDoc) (results, saved, done, _) = await engine.execute(join, 10e7) assert len(results) == 20 for res in results: assert '?s1' in res and '?s2' in res and '?common' in res assert done
async def test_rowbind_join(): iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object']) scan=ScanIterator(iterator, triple, card) bind=BindIterator(scan,"URI(CONCAT('http://',MD5(CONCAT(STR(?s),STR('http://isa'),STR(?o)))))",'?z') join=IndexJoinIterator(bind,innerTriple,hdtDoc) #print(join) (results, saved, done, _) = await engine.execute(join, 10e7) #print(results) assert len(results) > 0 assert done
async def test_and_or_filter_iterator(): context = { 'quantum': 10e7, 'max_results': 10e7 } expression = "?p = <http://schema.org/eligibleRegion> && (?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country0> || ?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country9>)" scan = ProjectionIterator(ScanIterator(hdtDoc, triple, context), context) iterator = FilterIterator(scan, expression, context) (results, saved, done, _) = await engine.execute(iterator, context) assert len(results) == 2 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ]
def load_scan(saved_plan: SavedScanIterator, dataset: Dataset) -> PreemptableIterator: """Load a ScanIterator from a protobuf serialization. Args: * saved_plan: Saved query execution plan. * dataset: RDF dataset used to execute the plan. Returns: The pipeline of iterator used to continue query execution. """ triple = saved_plan.triple s, p, o, g = (triple.subject, triple.predicate, triple.object, triple.graph) iterator, card = dataset.get_graph(g).search(s, p, o, last_read=saved_plan.last_read) return ScanIterator(iterator, protoTriple_to_dict(triple), saved_plan.cardinality,saved_plan.progress)
async def test_and_or_filter_iterator(): expression = "?p = <http://schema.org/eligibleRegion> && (?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country0> || ?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country9>)" iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done, _) = await engine.execute(iterator, math.inf) assert len(results) == 2 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ]
def load_scan(saved_plan: SavedScanIterator, dataset: Dataset, context: dict) -> PreemptableIterator: """Load a ScanIterator from a protobuf serialization. Args: * saved_plan: Saved query execution plan. * dataset: RDF dataset used to execute the plan. * context: Information about the query execution. Returns: The pipeline of iterator used to continue query execution. """ pattern = protoTriple_to_dict(saved_plan.pattern) connector = dataset.get_graph(pattern['graph']) if saved_plan.timestamp is not None and saved_plan.timestamp != '': as_of = datetime.fromisoformat(saved_plan.timestamp) else: as_of = None current_mappings = None if len(saved_plan.muc) > 0: current_mappings = dict(saved_plan.muc) mu = None if len(saved_plan.mu) > 0: mu = dict(saved_plan.mu) return ScanIterator(connector, pattern, context, current_mappings=current_mappings, mu=mu, last_read=saved_plan.last_read, as_of=as_of)
def build_left_join_tree( bgp: List[Dict[str, str]], dataset: Dataset, default_graph: str, context: dict, as_of: Optional[datetime] = None ) -> Tuple[PreemptableIterator, List[str], Dict[str, str]]: """Build a Left-linear join tree from a Basic Graph pattern. Args: * bgp: Basic Graph pattern used to build the join tree. * dataset: RDF dataset on which the BGPC is evaluated. * default_graph: URI of the default graph used for BGP evaluation. * context: Information about the query execution. * as_of: A timestamp used to perform all reads against a consistent version of the dataset. If `None`, use the latest version of the dataset, which does not guarantee snapshot isolation. Returns: A tuple (`iterator`, `query_vars`, `cardinalities`) where: * `iterator` is the root of the Left-linear join tree. * `query_vars` is the list of all SPARQL variables found in the BGP. * `cardinalities` is the list of estimated cardinalities of all triple patterns in the BGP. """ # gather metadata about triple patterns triples = [] cardinalities = [] # analyze each triple pattern in the BGP for triple in bgp: # select the graph used to evaluate the pattern graph_uri = triple['graph'] if 'graph' in triple and len( triple['graph']) > 0 else default_graph triple['graph'] = graph_uri # get iterator and statistics about the pattern if dataset.has_graph(graph_uri): it = ScanIterator(dataset.get_graph(graph_uri), triple, context, as_of=as_of) c = it.__len__() else: it, c = EmptyIterator(), 0 triples += [{'triple': triple, 'cardinality': c, 'iterator': it}] cardinalities += [{'triple': triple, 'cardinality': c}] # sort triples by ascending cardinality triples = sorted(triples, key=lambda v: v['cardinality']) # start the pipeline with the Scan with the most selective pattern pattern = triples.pop(0) query_vars = get_vars(pattern['triple']) # add a equality filter if the pattern has several variables that binds to the same value # example: ?s rdf:type ?s => Filter(Scan(?s rdf:type ?s_2), ?s == ?s_2) # eq_expr, new_pattern = equality_variables(pattern['triple']['subject'], pattern['triple']['predicate'], pattern['triple']['object']) # if eq_expr is not None: # # copy pattern with rewritten values # triple = pattern['triple'].copy() # triple["subject"] = new_pattern[0] # triple["predicate"] = new_pattern[1] # triple["object"] = new_pattern[2] # # build a pipline with Index Scan + Equality filter # pipeline = ScanIterator(pattern['iterator'], triple, pattern['cardinality']) # pipeline = FilterIterator(pipeline, eq_expr) # # update query variables # query_vars = query_vars | get_vars(triple) # else: # pipeline = ScanIterator(pattern['iterator'], pattern['triple'], pattern['cardinality']) pipeline = pattern['iterator'] # build the left linear tree of joins while len(triples) > 0: pattern, pos, query_vars = find_connected_pattern(query_vars, triples) # no connected pattern = disconnected BGP => pick the first remaining pattern in the BGP if pattern is None: pattern = triples[0] query_vars = query_vars | get_vars(pattern['triple']) pos = 0 graph_uri = pattern['triple']['graph'] pipeline = IndexJoinIterator(pipeline, pattern['iterator'], context) triples.pop(pos) return pipeline, query_vars, cardinalities
async def test_scan_save_interrupt(): context = {'quantum': 10e7, 'max_results': 1e-3} scan = ScanIterator(hdtDoc, triple, context) (results, saved, done, _) = await engine.execute(scan, context) assert len(results) <= scan.__len__()
async def test_scan_save_nointerrupt(): context = {'quantum': 10e7, 'max_results': 10e7} scan = ScanIterator(hdtDoc, triple, context) (results, saved, done, _) = await engine.execute(scan, context)
def build_left_plan(bgp, dataset, default_graph): """Build a Left-linear tree of joins from a BGP""" # gather metadata about triple patterns triples = [] cardinalities = [] # analyze each triple pattern in the BGP for triple in bgp: # select the graph used to evaluate the pattern graph_uri = triple['graph'] if 'graph' in triple and len( triple['graph']) > 0 else default_graph triple['graph'] = graph_uri # get iterator and statistics about the pattern if dataset.has_graph(graph_uri): it, c = dataset.get_graph(graph_uri).search( triple['subject'], triple['predicate'], triple['object']) else: it, c = EmptyIterator(), 0 triples += [{'triple': triple, 'cardinality': c, 'iterator': it}] cardinalities += [{'triple': triple, 'cardinality': c}] # sort triples by ascending cardinality triples = sorted(triples, key=lambda v: v['cardinality']) # start the pipeline with the Scan with the most selective pattern pattern = triples.pop(0) query_vars = get_vars(pattern['triple']) # add a equality filter if the pattern has several variables that binds to the same value # example: ?s rdf:type ?s => Filter(Scan(?s rdf:type ?s_2), ?s == ?s_2) eq_expr, new_pattern = equality_variables(pattern['triple']['subject'], pattern['triple']['predicate'], pattern['triple']['object']) if eq_expr is not None: # copy pattern with rewritten values triple = pattern['triple'].copy() triple["subject"] = new_pattern[0] triple["predicate"] = new_pattern[1] triple["object"] = new_pattern[2] # build a pipline with Index Scan + Equality filter pipeline = ScanIterator(pattern['iterator'], triple, pattern['cardinality']) pipeline = FilterIterator(pipeline, eq_expr) # update query variables query_vars = query_vars | get_vars(triple) else: pipeline = ScanIterator(pattern['iterator'], pattern['triple'], pattern['cardinality']) # build the left linear tree of joins while len(triples) > 0: pattern, pos, query_vars = find_connected_pattern(query_vars, triples) # no connected pattern = disconnected BGP => pick the first remaining pattern in the BGP if pattern is None: pattern = triples[0] query_vars = query_vars | get_vars(pattern['triple']) pos = 0 graph_uri = pattern['triple']['graph'] pipeline = IndexJoinIterator(pipeline, pattern['triple'], dataset.get_graph(graph_uri)) triples.pop(pos) return pipeline, query_vars, cardinalities
async def test_scan_save_interrupt(): iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card) (results, saved, done, _) = await engine.execute(scan, 1e-3) assert len(results) <= card