def test_scan_save_interrupt(): iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card) (results, saved, done) = engine.execute(scan, 1e-3) assert len(results) <= card
def test_filter_iterator_interrupt(): expression = "?p = <http://schema.org/eligibleRegion>" iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done) = engine.execute(iterator, 10e-7) assert len(results) <= 4 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country1', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country4', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ] tmp = len(results) reloaded = load(saved.SerializeToString(), DummyDataset(hdtDoc, 'watdiv100')) (results, saved, done) = engine.execute(reloaded, 10e7) assert len(results) + tmp == 4 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country1', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country4', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ] assert done
def test_operation_filter_iterator(): expression = "10 = 5 * 2" iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done) = engine.execute(iterator, math.inf) assert len(results) == 9
def test_function_filter_iterator(): expression = '?p = <http://purl.org/goodrelations/price> && isLiteral(?o) && !isNumeric(?o)' iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done) = engine.execute(iterator, math.inf) assert len(results) == 1
def build_left_plan(bgp, db_connector, source=None, base_vars=None): """Build a Left-linear tree of joins/left-joins from a BGP/OPTIONAL BGP""" # gather metadata about triple patterns triples = [] cardinalities = [] for triple in bgp: it, c = db_connector.search_triples(triple['subject'], triple['predicate'], triple['object']) triples += [{'triple': triple, 'cardinality': c, 'iterator': it}] cardinalities += [{'triple': triple, 'cardinality': c}] # sort triples by ascending cardinality triples = sorted(triples, key=lambda v: v['cardinality']) # if no input iterator provided, build a Scan with the most selective pattern if source is None: pattern = triples.pop(0) acc = ScanIterator(pattern['iterator'], pattern['triple'], pattern['cardinality']) query_vars = get_vars(pattern['triple']) else: pattern = None acc = source query_vars = base_vars # build the left linear tree while len(triples) > 0: pattern, pos, query_vars = find_connected_pattern(query_vars, triples) # no connected pattern = disconnected BGP => pick the first remaining pattern in the BGP if pattern is None: pattern = triples[0] query_vars = query_vars | get_vars(pattern['triple']) pos = 0 acc = IndexJoinIterator(acc, pattern['triple'], db_connector) triples.pop(pos) return acc, query_vars, cardinalities
def test_scan_read(): iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card) (results, saved, done) = engine.execute(scan, 10e7) assert len(results) == card assert done
def test_projection_read_stopped(): iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card) proj = ProjectionIterator(scan, ['?common']) (results, saved, done) = engine.execute(proj, 10e-4) assert len(results) <= card for res in results: assert '?common' in res and '?s1' not in res
def test_nlj_interrupt(): iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card) join = IndexJoinIterator(scan, innerTriple, hdtDoc) (results, saved, done) = engine.execute(join, 10e-5) assert len(results) <= 20
def load_scan(saved_plan, dataset): """Load a ScanIterator from a protobuf serialization""" triple = saved_plan.triple s, p, o, g = (triple.subject, triple.predicate, triple.object, triple.graph) iterator, card = dataset.get_graph(g).search_triples( s, p, o, offset=int(saved_plan.offset)) return ScanIterator(iterator, protoTriple_to_dict(triple), saved_plan.cardinality)
def load_scan(saved_plan, db_connector): """Load a ScanIterator from a protobuf serialization""" triple = saved_plan.triple s, p, o = (triple.subject, triple.predicate, triple.object) iterator, card = db_connector.search_triples(s, p, o, offset=int(saved_plan.offset)) return ScanIterator(iterator, protoTriple_to_dict(triple), saved_plan.cardinality)
def test_nlj_read(): iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ScanIterator(iterator, triple, card) join = IndexJoinIterator(scan, innerTriple, hdtDoc) (results, saved, done) = engine.execute(join, 10e7) assert len(results) == 20 for res in results: assert '?s1' in res and '?s2' in res and '?common' in res assert done
def _initInnerLoop(self, triple, mappings, offset=0): (s, p, o) = (apply_bindings(triple['subject'], mappings), apply_bindings(triple['predicate'], mappings), apply_bindings(triple['object'], mappings)) iterator, card = self._hdtDocument.search_triples(s, p, o, offset=offset) if card == 0: return None return ScanIterator(iterator, tuple_to_triple(s, p, o), card)
def test_and_or_filter_iterator(): expression = "?p = <http://schema.org/eligibleRegion> && (?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country0> || ?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country9>)" iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object']) scan = ProjectionIterator(ScanIterator(iterator, triple, card)) iterator = FilterIterator(scan, expression) (results, saved, done) = engine.execute(iterator, math.inf) assert len(results) == 2 for b in results: assert b['?p'] == 'http://schema.org/eligibleRegion' assert b['?o'] in [ 'http://db.uwaterloo.ca/~galuc/wsdbm/Country0', 'http://db.uwaterloo.ca/~galuc/wsdbm/Country9' ]
def build_left_plan(bgp, dataset, default_graph): """Build a Left-linear tree of joins from a BGP""" # gather metadata about triple patterns triples = [] cardinalities = [] # analyze each triple pattern in the BGP for triple in bgp: # select the graph used to evaluate the pattern graph_uri = triple['graph'] if 'graph' in triple and len(triple['graph']) > 0 else default_graph triple['graph'] = graph_uri # get iterator and statistics about the pattern if dataset.has_graph(graph_uri): it, c = dataset.get_graph(graph_uri).search_triples(triple['subject'], triple['predicate'], triple['object']) else: it, c = EmptyIterator(), 0 triples += [{'triple': triple, 'cardinality': c, 'iterator': it}] cardinalities += [{'triple': triple, 'cardinality': c}] # sort triples by ascending cardinality triples = sorted(triples, key=lambda v: v['cardinality']) # to start the pipeline, build a Scan with the most selective pattern pattern = triples.pop(0) pipeline = ScanIterator(pattern['iterator'], pattern['triple'], pattern['cardinality']) query_vars = get_vars(pattern['triple']) # build the left linear tree of joins while len(triples) > 0: pattern, pos, query_vars = find_connected_pattern(query_vars, triples) # no connected pattern = disconnected BGP => pick the first remaining pattern in the BGP if pattern is None: pattern = triples[0] query_vars = query_vars | get_vars(pattern['triple']) pos = 0 graph_uri = pattern['triple']['graph'] pipeline = IndexJoinIterator(pipeline, pattern['triple'], dataset.get_graph(graph_uri)) triples.pop(pos) return pipeline, query_vars, cardinalities