Exemple #1
0
def test_filter_iterator_interrupt():
    expression = "?p = <http://schema.org/eligibleRegion>"
    iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done) = engine.execute(iterator, 10e-7)
    assert len(results) <= 4
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country1',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country4',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
    tmp = len(results)
    reloaded = load(saved.SerializeToString(), DummyDataset(hdtDoc, 'watdiv100'))
    (results, saved, done) = engine.execute(reloaded, 10e7)
    assert len(results) + tmp == 4
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country1',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country4',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
    assert done
Exemple #2
0
def test_operation_filter_iterator():
    expression = "10 = 5 * 2"
    iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done) = engine.execute(iterator, math.inf)
    assert len(results) == 9
Exemple #3
0
def test_function_filter_iterator():
    expression = '?p = <http://purl.org/goodrelations/price> && isLiteral(?o) && !isNumeric(?o)'
    iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done) = engine.execute(iterator, math.inf)
    assert len(results) == 1
Exemple #4
0
def build_query_plan(query, db_connector, saved_plan=None, projection=None):
    cardinalities = []
    if saved_plan is not None:
        return load(saved_plan, db_connector), []

    # optional = query['optional'] if 'optional' in query and len(query['optional']) > 0 else None
    root = None

    if query['type'] == 'union':
        root, cardinalities = build_union_plan(query['union'], db_connector,
                                               projection)
    elif query['type'] == 'bgp':
        root, cardinalities = build_join_plan(query['bgp'],
                                              db_connector,
                                              projection=projection)
    else:
        raise Exception('Unkown query type found during query optimization')

    # apply filter clause(s)
    if 'filters' in query and len(query['filters']) > 0:
        # reduce all filters in a conjunctive expression
        expression = reduce(lambda x, y: "({}) && ({})".format(x, y),
                            query['filters'])
        root = FilterIterator(root, expression)
    return root, cardinalities
Exemple #5
0
def test_and_or_filter_iterator():
    expression = "?p = <http://schema.org/eligibleRegion> && (?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country0> || ?o = <http://db.uwaterloo.ca/~galuc/wsdbm/Country9>)"
    iterator, card = hdtDoc.search_triples(triple['subject'], triple['predicate'], triple['object'])
    scan = ProjectionIterator(ScanIterator(iterator, triple, card))
    iterator = FilterIterator(scan, expression)
    (results, saved, done) = engine.execute(iterator, math.inf)
    assert len(results) == 2
    for b in results:
        assert b['?p'] == 'http://schema.org/eligibleRegion'
        assert b['?o'] in [
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country0',
            'http://db.uwaterloo.ca/~galuc/wsdbm/Country9'
        ]
Exemple #6
0
def parse_query_node(node, dataset, current_graphs, server_url, cardinalities):
    """
        Recursively parse node in the query logical plan to build a preemptable physical query execution plan.

        Args:
            * node - Node of the logical plan to parse (in rdflib format)
            * dataset - RDF dataset used to execute the query
            * current_graphs - List of IRI of the current RDF graph queried
            * server_url - URL of the SaGe server
            * cardinalities - Map<triple,integer> used to track triple patterns cardinalities
    """
    if node.name == 'SelectQuery':
        # in case of a FROM clause, set the new default graphs used
        graphs = current_graphs
        if node.datasetClause is not None:
            graphs = [format_graph_uri(format_term(graph_iri.default), server_url) for graph_iri in node.datasetClause]
        return parse_query_node(node.p, dataset, graphs, server_url, cardinalities)
    elif node.name == 'Project':
        query_vars = list(map(lambda t: '?' + str(t), node._vars))
        child = parse_query_node(node.p, dataset, current_graphs, server_url, cardinalities)
        return ProjectionIterator(child, query_vars)
    elif node.name == 'BGP':
        # bgp_vars = node._vars
        triples = list(localize_triple(node.triples, current_graphs))
        iterator, query_vars, c = build_left_plan(triples, dataset, current_graphs)
        # track cardinalities of every triple pattern
        cardinalities += c
        return iterator
    elif node.name == 'Union':
        left = parse_query_node(node.p1, dataset, current_graphs, server_url, cardinalities)
        right = parse_query_node(node.p2, dataset, current_graphs, server_url, cardinalities)
        return BagUnionIterator(left, right)
    elif node.name == 'Filter':
        expression = parse_filter_expr(node.expr)
        iterator = parse_query_node(node.p, dataset, current_graphs, server_url, cardinalities)
        return FilterIterator(iterator, expression)
    elif node.name == 'Join':
        # only allow for joining BGPs from different GRAPH clauses
        triples = fetch_graph_triples(node.p1, current_graphs, server_url) + fetch_graph_triples(node.p2, current_graphs, server_url)
        iterator, query_vars, c = build_left_plan(triples, dataset, current_graphs)
        # track cardinalities of every triple pattern
        cardinalities += c
        return iterator
    else:
        raise UnsupportedSPARQL("Unsupported SPARQL feature: {}".format(node.name))
def build_query_plan(query, dataset, default_graph, saved_plan=None):
    """Build a pipeline of iterators used to evaluate a query"""
    cardinalities = []
    if saved_plan is not None:
        return load(saved_plan, dataset), []

    root = None
    if query['type'] == 'union':
        root, cardinalities = build_union_plan(query['union'], dataset, default_graph)
    elif query['type'] == 'bgp':
        root, cardinalities = build_join_plan(query['bgp'], dataset, default_graph)
    else:
        raise Exception('Unkown query type found during query optimization')

    # apply (possible) filter clause(s)
    if 'filters' in query and len(query['filters']) > 0:
        # exclude empty strings
        filters = list(filter(lambda x: len(x) > 0, query['filters']))
        if len(filters) > 0:
            # reduce all filters in a conjunctive expression
            expression = reduce(lambda x, y: "({}) && ({})".format(x, y), filters)
            root = FilterIterator(root, expression)
    return root, cardinalities
Exemple #8
0
def load_filter(saved_plan, dataset):
    """Load a FilterIterator from a protobuf serialization"""
    sourceField = saved_plan.WhichOneof('source')
    source = load(getattr(saved_plan, sourceField), dataset)
    return FilterIterator(source, saved_plan.expression)