Esempio n. 1
0
async def test_nlj_interrupt():
    context = {'quantum': 10e7, 'max_results': 10e-5}
    left_scan = ScanIterator(hdtDoc, triple, context)
    right_scan = ScanIterator(hdtDoc, innerTriple, context)
    join = IndexJoinIterator(left_scan, right_scan, context)
    (results, saved, done, _) = await engine.execute(join, context)
    assert len(results) <= 20
Esempio n. 2
0
async def test_nlj_read():
    context = {'quantum': 10e7, 'max_results': 10e7}
    left_scan = ScanIterator(hdtDoc, triple, context)
    right_scan = ScanIterator(hdtDoc, innerTriple, context)
    join = IndexJoinIterator(left_scan, right_scan, context)
    (results, saved, done, _) = await engine.execute(join, context)
    assert len(results) == 20
    for res in results:
        assert '?s1' in res and '?s2' in res and '?common' in res
    assert done
Esempio n. 3
0
async def test_nlj_interrupt():
    iterator, card = hdtDoc.search(triple['subject'], triple['predicate'],
                                   triple['object'])
    scan = ScanIterator(iterator, triple, card)
    join = IndexJoinIterator(scan, innerTriple, hdtDoc)
    (results, saved, done, _) = await engine.execute(join, 10e-5)
    assert len(results) <= 20
Esempio n. 4
0
async def test_scan_read():
    iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object'])
    scan = ScanIterator(iterator, triple, card)
    (results, saved, done, _) = await engine.execute(scan, 10e7)
    #print(results)
    assert len(results) > 0
    assert done
async def test_operation_filter_iterator():
    context = { 'quantum': 10e7, 'max_results': 10e7 }
    expression = "10 = 5 * 2"
    scan = ProjectionIterator(ScanIterator(hdtDoc, triple, context), context)
    iterator = FilterIterator(scan, expression, context)
    (results, saved, done, _) = await engine.execute(iterator, context)
    assert len(results) == 9
async def test_filter_iterator_interrupt():
    context = { 'quantum': 10e-7, 'max_results': 10e7 }
    expression = "?p = <http://schema.org/eligibleRegion>"
    scan = ProjectionIterator(ScanIterator(hdtDoc, triple, context), context)
    iterator = FilterIterator(scan, expression, context)
    (results, saved, done, _) = await engine.execute(iterator, context)
    assert len(results) <= 4
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country1',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country4',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
    tmp = len(results)
    context['quantum'] = 10e7
    reloaded = load(saved.SerializeToString(), DummyDataset(hdtDoc, 'watdiv100'), context)
    (results, saved, done, _) = await engine.execute(reloaded, context)
    assert len(results) + tmp == 4
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country1',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country4',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
    assert done
Esempio n. 7
0
async def test_filter_iterator_interrupt():
    expression = "?p = <http://schema.org/eligibleRegion>"
    iterator, card = hdtDoc.search(triple['subject'], triple['predicate'],
                                   triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done, _) = await engine.execute(iterator, 10e-7, 2)
    assert len(results) <= 4
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country1',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country4',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
    tmp = len(results)
    reloaded = load(saved.SerializeToString(),
                    DummyDataset(hdtDoc, 'watdiv100'))
    (results, saved, done, _) = await engine.execute(reloaded, 10e7)
    assert len(results) + tmp == 4
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country1',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country4',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
    assert done
Esempio n. 8
0
 def _initInnerLoop(self,
                    triple: Dict[str, str],
                    mappings: Optional[Dict[str, str]],
                    last_read: Optional[str] = None) -> PreemptableIterator:
     """Create an iterator to evaluates an inner loop in the Index Loop join algorithm.
     
     Args:
       * triple: Triple pattern to join with.
       * mappings: Input solution mappings for the join.
       * last_read: An offset ID used to resume processing of an inner loop.
     
     Returns:
       An iterator used to evaluate the inner loop.
     """
     if mappings is None:
         return EmptyIterator(triple)
     (s, p, o) = (find_in_mappings(triple['subject'], mappings),
                  find_in_mappings(triple['predicate'], mappings),
                  find_in_mappings(triple['object'], mappings))
     iterator, card = self._graph.search(s,
                                         p,
                                         o,
                                         last_read=last_read,
                                         as_of=self._start_timestamp)
     if card == 0:
         return None
     return ScanIterator(iterator, tuple_to_triple(s, p, o), card)
Esempio n. 9
0
async def test_operation_filter_iterator():
    expression = "10 = 5 * 2"
    iterator, card = hdtDoc.search(triple['subject'], triple['predicate'],
                                   triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done, _) = await engine.execute(iterator, math.inf)
    assert len(results) == 9
Esempio n. 10
0
async def test_function_filter_iterator():
    expression = '?p = <http://purl.org/goodrelations/price> && isLiteral(?o) && !isNumeric(?o)'
    iterator, card = hdtDoc.search(triple['subject'], triple['predicate'],
                                   triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done, _) = await engine.execute(iterator, math.inf)
    assert len(results) == 1
Esempio n. 11
0
 def _initInnerLoop(self, triple, mappings, last_read=None):
     (s, p, o) = (apply_bindings(triple['subject'], mappings),
                  apply_bindings(triple['predicate'], mappings),
                  apply_bindings(triple['object'], mappings))
     iterator, card = self._hdtDocument.search(s, p, o, last_read=last_read)
     if card == 0:
         return None
     return ScanIterator(iterator, tuple_to_triple(s, p, o), card)
Esempio n. 12
0
async def test_rowbind():
    iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object'])
    scan=ScanIterator(iterator, triple, card)
    bind=BindIterator(scan,"MD5(CONCAT(STR(?s),STR('http://isa'),STR(?o)))",'?z')

    (results, saved, done, _) = await engine.execute(bind, 10e7)
    assert len(results) > 0
    assert done
Esempio n. 13
0
def load_scan(saved_plan, dataset):
    """Load a ScanIterator from a protobuf serialization"""
    triple = saved_plan.triple
    s, p, o, g = (triple.subject, triple.predicate, triple.object,
                  triple.graph)
    iterator, card = dataset.get_graph(g).search(
        s, p, o, last_read=saved_plan.last_read)
    return ScanIterator(iterator, protoTriple_to_dict(triple),
                        saved_plan.cardinality)
Esempio n. 14
0
async def test_projection_read_stopped():
    iterator, card = hdtDoc.search(triple['subject'], triple['predicate'],
                                   triple['object'])
    scan = ScanIterator(iterator, triple, card)
    proj = ProjectionIterator(scan, ['?common'])
    (results, saved, done, _) = await engine.execute(proj, 10e-4)
    assert len(results) <= card
    for res in results:
        assert '?common' in res and '?s1' not in res
Esempio n. 15
0
async def test_nlj_read():
    iterator, card = hdtDoc.search(triple['subject'], triple['predicate'],
                                   triple['object'])
    scan = ScanIterator(iterator, triple, card)
    join = IndexJoinIterator(scan, innerTriple, hdtDoc)
    (results, saved, done, _) = await engine.execute(join, 10e7)
    assert len(results) == 20
    for res in results:
        assert '?s1' in res and '?s2' in res and '?common' in res
    assert done
Esempio n. 16
0
async def test_rowbind_join():
    iterator, card = hdtDoc.search(triple['subject'], triple['predicate'], triple['object'])
    scan=ScanIterator(iterator, triple, card)
    bind=BindIterator(scan,"URI(CONCAT('http://',MD5(CONCAT(STR(?s),STR('http://isa'),STR(?o)))))",'?z')
    join=IndexJoinIterator(bind,innerTriple,hdtDoc)

    #print(join)

    (results, saved, done, _) = await engine.execute(join, 10e7)
    #print(results)
    assert len(results) > 0
    assert done
Esempio n. 17
0
async def test_and_or_filter_iterator():
    context = { 'quantum': 10e7, 'max_results': 10e7 }
    expression = "?p = <http://schema.org/eligibleRegion> && (?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country0> || ?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country9>)"
    scan = ProjectionIterator(ScanIterator(hdtDoc, triple, context), context)
    iterator = FilterIterator(scan, expression, context)
    (results, saved, done, _) = await engine.execute(iterator, context)
    assert len(results) == 2
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
Esempio n. 18
0
def load_scan(saved_plan: SavedScanIterator, dataset: Dataset) -> PreemptableIterator:
    """Load a ScanIterator from a protobuf serialization.

    Args:
      * saved_plan: Saved query execution plan.
      * dataset: RDF dataset used to execute the plan.

    Returns:
      The pipeline of iterator used to continue query execution.
    """
    triple = saved_plan.triple
    s, p, o, g = (triple.subject, triple.predicate, triple.object, triple.graph)
    iterator, card = dataset.get_graph(g).search(s, p, o, last_read=saved_plan.last_read)
    return ScanIterator(iterator, protoTriple_to_dict(triple), saved_plan.cardinality,saved_plan.progress)
Esempio n. 19
0
async def test_and_or_filter_iterator():
    expression = "?p = <http://schema.org/eligibleRegion> && (?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country0> || ?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country9>)"
    iterator, card = hdtDoc.search(triple['subject'], triple['predicate'],
                                   triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done, _) = await engine.execute(iterator, math.inf)
    assert len(results) == 2
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
Esempio n. 20
0
def load_scan(saved_plan: SavedScanIterator, dataset: Dataset, context: dict) -> PreemptableIterator:
    """Load a ScanIterator from a protobuf serialization.

    Args:
      * saved_plan: Saved query execution plan.
      * dataset: RDF dataset used to execute the plan.
      * context: Information about the query execution.

    Returns:
      The pipeline of iterator used to continue query execution.
    """
    pattern = protoTriple_to_dict(saved_plan.pattern)
    connector = dataset.get_graph(pattern['graph'])
    if saved_plan.timestamp is not None and saved_plan.timestamp != '':
        as_of = datetime.fromisoformat(saved_plan.timestamp)
    else:
        as_of = None
    current_mappings = None
    if len(saved_plan.muc) > 0:
        current_mappings = dict(saved_plan.muc)
    mu = None
    if len(saved_plan.mu) > 0:
        mu = dict(saved_plan.mu)
    return ScanIterator(connector, pattern, context, current_mappings=current_mappings, mu=mu, last_read=saved_plan.last_read, as_of=as_of)
Esempio n. 21
0
def build_left_join_tree(
    bgp: List[Dict[str, str]],
    dataset: Dataset,
    default_graph: str,
    context: dict,
    as_of: Optional[datetime] = None
) -> Tuple[PreemptableIterator, List[str], Dict[str, str]]:
    """Build a Left-linear join tree from a Basic Graph pattern.

    Args:
      * bgp: Basic Graph pattern used to build the join tree.
      * dataset: RDF dataset on which the BGPC is evaluated.
      * default_graph: URI of the default graph used for BGP evaluation.
      * context: Information about the query execution.
      * as_of: A timestamp used to perform all reads against a consistent version of the dataset. If `None`, use the latest version of the dataset, which does not guarantee snapshot isolation.

    Returns: A tuple (`iterator`, `query_vars`, `cardinalities`) where:
      * `iterator` is the root of the Left-linear join tree.
      * `query_vars` is the list of all SPARQL variables found in the BGP.
      * `cardinalities` is the list of estimated cardinalities of all triple patterns in the BGP.
    """
    # gather metadata about triple patterns
    triples = []
    cardinalities = []

    # analyze each triple pattern in the BGP
    for triple in bgp:
        # select the graph used to evaluate the pattern
        graph_uri = triple['graph'] if 'graph' in triple and len(
            triple['graph']) > 0 else default_graph
        triple['graph'] = graph_uri
        # get iterator and statistics about the pattern
        if dataset.has_graph(graph_uri):
            it = ScanIterator(dataset.get_graph(graph_uri),
                              triple,
                              context,
                              as_of=as_of)
            c = it.__len__()
        else:
            it, c = EmptyIterator(), 0
        triples += [{'triple': triple, 'cardinality': c, 'iterator': it}]
        cardinalities += [{'triple': triple, 'cardinality': c}]

    # sort triples by ascending cardinality
    triples = sorted(triples, key=lambda v: v['cardinality'])

    # start the pipeline with the Scan with the most selective pattern
    pattern = triples.pop(0)
    query_vars = get_vars(pattern['triple'])

    # add a equality filter if the pattern has several variables that binds to the same value
    # example: ?s rdf:type ?s => Filter(Scan(?s rdf:type ?s_2), ?s == ?s_2)
    # eq_expr, new_pattern = equality_variables(pattern['triple']['subject'], pattern['triple']['predicate'], pattern['triple']['object'])
    # if eq_expr is not None:
    #     # copy pattern with rewritten values
    #     triple = pattern['triple'].copy()
    #     triple["subject"] = new_pattern[0]
    #     triple["predicate"] = new_pattern[1]
    #     triple["object"] = new_pattern[2]
    #     # build a pipline with Index Scan + Equality filter
    #     pipeline = ScanIterator(pattern['iterator'], triple, pattern['cardinality'])
    #     pipeline = FilterIterator(pipeline, eq_expr)
    #     # update query variables
    #     query_vars = query_vars | get_vars(triple)
    # else:
    #     pipeline = ScanIterator(pattern['iterator'], pattern['triple'], pattern['cardinality'])

    pipeline = pattern['iterator']

    # build the left linear tree of joins
    while len(triples) > 0:
        pattern, pos, query_vars = find_connected_pattern(query_vars, triples)
        # no connected pattern = disconnected BGP => pick the first remaining pattern in the BGP
        if pattern is None:
            pattern = triples[0]
            query_vars = query_vars | get_vars(pattern['triple'])
            pos = 0
        graph_uri = pattern['triple']['graph']
        pipeline = IndexJoinIterator(pipeline, pattern['iterator'], context)
        triples.pop(pos)
    return pipeline, query_vars, cardinalities
Esempio n. 22
0
async def test_scan_save_interrupt():
    context = {'quantum': 10e7, 'max_results': 1e-3}
    scan = ScanIterator(hdtDoc, triple, context)
    (results, saved, done, _) = await engine.execute(scan, context)
    assert len(results) <= scan.__len__()
Esempio n. 23
0
async def test_scan_save_nointerrupt():
    context = {'quantum': 10e7, 'max_results': 10e7}
    scan = ScanIterator(hdtDoc, triple, context)
    (results, saved, done, _) = await engine.execute(scan, context)
Esempio n. 24
0
def build_left_plan(bgp, dataset, default_graph):
    """Build a Left-linear tree of joins from a BGP"""
    # gather metadata about triple patterns
    triples = []
    cardinalities = []

    # analyze each triple pattern in the BGP
    for triple in bgp:
        # select the graph used to evaluate the pattern
        graph_uri = triple['graph'] if 'graph' in triple and len(
            triple['graph']) > 0 else default_graph
        triple['graph'] = graph_uri
        # get iterator and statistics about the pattern
        if dataset.has_graph(graph_uri):
            it, c = dataset.get_graph(graph_uri).search(
                triple['subject'], triple['predicate'], triple['object'])
        else:
            it, c = EmptyIterator(), 0
        triples += [{'triple': triple, 'cardinality': c, 'iterator': it}]
        cardinalities += [{'triple': triple, 'cardinality': c}]

    # sort triples by ascending cardinality
    triples = sorted(triples, key=lambda v: v['cardinality'])

    # start the pipeline with the Scan with the most selective pattern
    pattern = triples.pop(0)
    query_vars = get_vars(pattern['triple'])

    # add a equality filter if the pattern has several variables that binds to the same value
    # example: ?s rdf:type ?s => Filter(Scan(?s rdf:type ?s_2), ?s == ?s_2)
    eq_expr, new_pattern = equality_variables(pattern['triple']['subject'],
                                              pattern['triple']['predicate'],
                                              pattern['triple']['object'])
    if eq_expr is not None:
        # copy pattern with rewritten values
        triple = pattern['triple'].copy()
        triple["subject"] = new_pattern[0]
        triple["predicate"] = new_pattern[1]
        triple["object"] = new_pattern[2]
        # build a pipline with Index Scan + Equality filter
        pipeline = ScanIterator(pattern['iterator'], triple,
                                pattern['cardinality'])
        pipeline = FilterIterator(pipeline, eq_expr)
        # update query variables
        query_vars = query_vars | get_vars(triple)
    else:
        pipeline = ScanIterator(pattern['iterator'], pattern['triple'],
                                pattern['cardinality'])

    # build the left linear tree of joins
    while len(triples) > 0:
        pattern, pos, query_vars = find_connected_pattern(query_vars, triples)
        # no connected pattern = disconnected BGP => pick the first remaining pattern in the BGP
        if pattern is None:
            pattern = triples[0]
            query_vars = query_vars | get_vars(pattern['triple'])
            pos = 0
        graph_uri = pattern['triple']['graph']
        pipeline = IndexJoinIterator(pipeline, pattern['triple'],
                                     dataset.get_graph(graph_uri))
        triples.pop(pos)
    return pipeline, query_vars, cardinalities
Esempio n. 25
0
async def test_scan_save_interrupt():
    iterator, card = hdtDoc.search(triple['subject'], triple['predicate'],
                                   triple['object'])
    scan = ScanIterator(iterator, triple, card)
    (results, saved, done, _) = await engine.execute(scan, 1e-3)
    assert len(results) <= card