Exemple #1
0
def test_scan_save_interrupt():
    iterator, card = hdtDoc.search_triples(triple['subject'],
                                           triple['predicate'],
                                           triple['object'])
    scan = ScanIterator(iterator, triple, card)
    (results, saved, done) = engine.execute(scan, 1e-3)
    assert len(results) <= card
Exemple #2
0
def test_filter_iterator_interrupt():
    expression = "?p = <http://schema.org/eligibleRegion>"
    iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done) = engine.execute(iterator, 10e-7)
    assert len(results) <= 4
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country1',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country4',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
    tmp = len(results)
    reloaded = load(saved.SerializeToString(), DummyDataset(hdtDoc, 'watdiv100'))
    (results, saved, done) = engine.execute(reloaded, 10e7)
    assert len(results) + tmp == 4
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country1',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country4',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
    assert done
Exemple #3
0
def test_operation_filter_iterator():
    expression = "10 = 5 * 2"
    iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done) = engine.execute(iterator, math.inf)
    assert len(results) == 9
Exemple #4
0
def test_function_filter_iterator():
    expression = '?p = <http://purl.org/goodrelations/price> && isLiteral(?o) && !isNumeric(?o)'
    iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done) = engine.execute(iterator, math.inf)
    assert len(results) == 1
Exemple #5
0
def build_left_plan(bgp, db_connector, source=None, base_vars=None):
    """Build a Left-linear tree of joins/left-joins from a BGP/OPTIONAL BGP"""
    # gather metadata about triple patterns
    triples = []
    cardinalities = []
    for triple in bgp:
        it, c = db_connector.search_triples(triple['subject'],
                                            triple['predicate'],
                                            triple['object'])
        triples += [{'triple': triple, 'cardinality': c, 'iterator': it}]
        cardinalities += [{'triple': triple, 'cardinality': c}]
    # sort triples by ascending cardinality
    triples = sorted(triples, key=lambda v: v['cardinality'])
    # if no input iterator provided, build a Scan with the most selective pattern
    if source is None:
        pattern = triples.pop(0)
        acc = ScanIterator(pattern['iterator'], pattern['triple'],
                           pattern['cardinality'])
        query_vars = get_vars(pattern['triple'])
    else:
        pattern = None
        acc = source
        query_vars = base_vars
    # build the left linear tree
    while len(triples) > 0:
        pattern, pos, query_vars = find_connected_pattern(query_vars, triples)
        # no connected pattern = disconnected BGP => pick the first remaining pattern in the BGP
        if pattern is None:
            pattern = triples[0]
            query_vars = query_vars | get_vars(pattern['triple'])
            pos = 0
        acc = IndexJoinIterator(acc, pattern['triple'], db_connector)
        triples.pop(pos)
    return acc, query_vars, cardinalities
Exemple #6
0
def test_scan_read():
    iterator, card = hdtDoc.search_triples(triple['subject'],
                                           triple['predicate'],
                                           triple['object'])
    scan = ScanIterator(iterator, triple, card)
    (results, saved, done) = engine.execute(scan, 10e7)
    assert len(results) == card
    assert done
def test_projection_read_stopped():
    iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object'])
    scan = ScanIterator(iterator, triple, card)
    proj = ProjectionIterator(scan, ['?common'])
    (results, saved, done) = engine.execute(proj, 10e-4)
    assert len(results) <= card
    for res in results:
        assert '?common' in res and '?s1' not in res
Exemple #8
0
def test_nlj_interrupt():
    iterator, card = hdtDoc.search_triples(triple['subject'],
                                           triple['predicate'],
                                           triple['object'])
    scan = ScanIterator(iterator, triple, card)
    join = IndexJoinIterator(scan, innerTriple, hdtDoc)
    (results, saved, done) = engine.execute(join, 10e-5)
    assert len(results) <= 20
Exemple #9
0
def load_scan(saved_plan, dataset):
    """Load a ScanIterator from a protobuf serialization"""
    triple = saved_plan.triple
    s, p, o, g = (triple.subject, triple.predicate, triple.object,
                  triple.graph)
    iterator, card = dataset.get_graph(g).search_triples(
        s, p, o, offset=int(saved_plan.offset))
    return ScanIterator(iterator, protoTriple_to_dict(triple),
                        saved_plan.cardinality)
Exemple #10
0
def load_scan(saved_plan, db_connector):
    """Load a ScanIterator from a protobuf serialization"""
    triple = saved_plan.triple
    s, p, o = (triple.subject, triple.predicate, triple.object)
    iterator, card = db_connector.search_triples(s,
                                                 p,
                                                 o,
                                                 offset=int(saved_plan.offset))
    return ScanIterator(iterator, protoTriple_to_dict(triple),
                        saved_plan.cardinality)
Exemple #11
0
def test_nlj_read():
    iterator, card = hdtDoc.search_triples(triple['subject'],
                                           triple['predicate'],
                                           triple['object'])
    scan = ScanIterator(iterator, triple, card)
    join = IndexJoinIterator(scan, innerTriple, hdtDoc)
    (results, saved, done) = engine.execute(join, 10e7)
    assert len(results) == 20
    for res in results:
        assert '?s1' in res and '?s2' in res and '?common' in res
    assert done
Exemple #12
0
 def _initInnerLoop(self, triple, mappings, offset=0):
     (s, p, o) = (apply_bindings(triple['subject'], mappings),
                  apply_bindings(triple['predicate'], mappings),
                  apply_bindings(triple['object'], mappings))
     iterator, card = self._hdtDocument.search_triples(s,
                                                       p,
                                                       o,
                                                       offset=offset)
     if card == 0:
         return None
     return ScanIterator(iterator, tuple_to_triple(s, p, o), card)
Exemple #13
0
def test_and_or_filter_iterator():
    expression = "?p = <http://schema.org/eligibleRegion> && (?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country0> || ?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country9>)"
    iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done) = engine.execute(iterator, math.inf)
    assert len(results) == 2
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
Exemple #14
0
def build_left_plan(bgp, dataset, default_graph):
    """Build a Left-linear tree of joins from a BGP"""
    # gather metadata about triple patterns
    triples = []
    cardinalities = []

    # analyze each triple pattern in the BGP
    for triple in bgp:
        # select the graph used to evaluate the pattern
        graph_uri = triple['graph'] if 'graph' in triple and len(triple['graph']) > 0 else default_graph
        triple['graph'] = graph_uri
        # get iterator and statistics about the pattern
        if dataset.has_graph(graph_uri):
            it, c = dataset.get_graph(graph_uri).search_triples(triple['subject'], triple['predicate'], triple['object'])
        else:
            it, c = EmptyIterator(), 0
        triples += [{'triple': triple, 'cardinality': c, 'iterator': it}]
        cardinalities += [{'triple': triple, 'cardinality': c}]

    # sort triples by ascending cardinality
    triples = sorted(triples, key=lambda v: v['cardinality'])
    # to start the pipeline, build a Scan with the most selective pattern
    pattern = triples.pop(0)
    pipeline = ScanIterator(pattern['iterator'], pattern['triple'], pattern['cardinality'])
    query_vars = get_vars(pattern['triple'])

    # build the left linear tree of joins
    while len(triples) > 0:
        pattern, pos, query_vars = find_connected_pattern(query_vars, triples)
        # no connected pattern = disconnected BGP => pick the first remaining pattern in the BGP
        if pattern is None:
            pattern = triples[0]
            query_vars = query_vars | get_vars(pattern['triple'])
            pos = 0
        graph_uri = pattern['triple']['graph']
        pipeline = IndexJoinIterator(pipeline, pattern['triple'], dataset.get_graph(graph_uri))
        triples.pop(pos)
    return pipeline, query_vars, cardinalities