def translateAggregates(q, M): E = [] A = [] # collect/replace aggs in : # select expr as ?var if q.projection: for v in q.projection: if v.evar: v.expr = traverse(v.expr, functools.partial(_sample, v=v.evar)) v.expr = traverse(v.expr, functools.partial(_aggs, A=A)) # having clause if traverse(q.having, _hasAggregate, complete=False): q.having = traverse(q.having, _sample) traverse(q.having, functools.partial(_aggs, A=A)) # order by if traverse(q.orderby, _hasAggregate, complete=False): q.orderby = traverse(q.orderby, _sample) traverse(q.orderby, functools.partial(_aggs, A=A)) # sample all other select vars # TODO: only allowed for vars in group-by? if q.projection: for v in q.projection: if v.var: rv = Variable("__agg_%d__" % (len(A) + 1)) A.append(CompValue("Aggregate_Sample", vars=v.var, res=rv)) E.append((rv, v.var)) return CompValue("AggregateJoin", A=A, p=M), E
def configure_query_dataset(parsed_query, default_graphs, named_graphs): """Substitute the default and named graph URI. According to https://www.w3.org/TR/sparql11-protocol/ we will remove the named and default graph URIs given in the query string (if given) and will add default-graph-uri and named-graph-uri from protocol request. Args: parsed_query: the parsed query default_graphs: a list of uri strings for default graphs named_graphs: a list of uri strings for named graphs """ if not isinstance(default_graphs, list) or not isinstance( named_graphs, list): return parsed_query if len(default_graphs) == 0 and len(named_graphs) == 0: return parsed_query # clean existing named (FROM NAMED) and default (FROM) DatasetClauses parsed_query[1]['datasetClause'] = plist() # add new named (default-graph-uri) and default (named-graph-uri) # DatasetClauses from Protocol for uri in default_graphs: parsed_query[1]['datasetClause'].append( CompValue('DatasetClause', default=URIRef(uri))) for uri in named_graphs: parsed_query[1]['datasetClause'].append( CompValue('DatasetClause', named=URIRef(uri))) return parsed_query
def configure_update_dataset(parsed_update, default_graphs, named_graphs): """Add default and named graph URI. According to https://www.w3.org/TR/sparql11-protocol/ we will add using-named-graph-uri and using-graph-uri if the update requst does not contain a USING, USING NAMED, or WITH clause. Args: parsed_update: the parsed update default_graphs: a list of uri strings for default graphs named_graphs: a list of uri strings for named graphs """ if not isinstance(default_graphs, list) or not isinstance( named_graphs, list): return parsed_update if len(default_graphs) == 0 and len(named_graphs) == 0: return parsed_update if parsed_update.request[0].withClause is not None: raise SparqlProtocolError if parsed_update.request[0].using is not None: raise SparqlProtocolError parsed_update.request[0]['using'] = plist() # add new named (using-named-graph-uri) and default (using-graph-uri) # UsingClauses from Protocol for uri in default_graphs: parsed_update.request[0]['using'].append( CompValue('UsingClause', default=URIRef(uri))) for uri in named_graphs: parsed_update.request[0]['using'].append( CompValue('UsingClause', named=URIRef(uri))) return parsed_update
def translateQuery(q, base=None, initNs=None): """ Translate a query-parsetree to a SPARQL Algebra Expression Return a rdflib.plugins.sparql.sparql.Query object """ # We get in: (prologue, query) prologue = translatePrologue(q[0], base, initNs) # absolutize/resolve prefixes q[1] = traverse(q[1], visitPost=functools.partial(translatePName, prologue=prologue)) P, PV = translate(q[1]) datasetClause = q[1].datasetClause if q[1].name == "ConstructQuery": template = triples(q[1].template) if q[1].template else None res = CompValue(q[1].name, p=P, template=template, datasetClause=datasetClause) else: res = CompValue(q[1].name, p=P, datasetClause=datasetClause, PV=PV) res = traverse(res, visitPost=simplify) _traverseAgg(res, visitor=analyse) _traverseAgg(res, _addVars) return Query(prologue, res)
def _sample(e, v=None): """ For each unaggregated variable V in expr Replace V with Sample(V) """ if isinstance(e, CompValue) and e.name.startswith("Aggregate_"): return e # do not replace vars in aggregates if isinstance(e, Variable) and v != e: return CompValue("Aggregate_Sample", vars=e)
def _prolog_conditional_expression(name, args, env, pe, var_map, kb): if len(args) != 2: raise PrologError( '_prolog_conditional_expression %s: 2 args expected.' % name) return CompValue( name, expr=prolog_to_filter_expression(args[0], env, pe, var_map, kb), other=[prolog_to_filter_expression(args[1], env, pe, var_map, kb)], _vars=set(var_map.values()))
def test_query_no_vars(self): triples = [(rdflib.URIRef('http://dbpedia.org/resource/Helmut_Kohl'), rdflib.URIRef('http://dbpedia.org/property/deputy'), rdflib.URIRef('http://dbpedia.org/resource/Klaus_Kinkel'))] algebra = CompValue('SelectQuery', p=CompValue('BGP', triples=triples, _vars=set()), datasetClause=None, PV=[], _vars=set()) res = self.sas.query_algebra(algebra) self.assertEqual(len(res), 1) for row in res: s = '' for v in res.vars: s += ' %s=%s' % (v, row[v]) logging.debug('algebra result row: %s' % s)
def _prolog_relational_expression(op, args, env, pe, var_map, kb): if len(args) != 2: raise PrologError('_prolog_relational_expression: 2 args expected.') expr = prolog_to_filter_expression(args[0], env, pe, var_map, kb) other = prolog_to_filter_expression(args[1], env, pe, var_map, kb) return CompValue('RelationalExpression', op=op, expr=expr, other=other, _vars=set(var_map.values()))
def translateValues(v): # if len(v.var)!=len(v.value): # raise Exception("Unmatched vars and values in ValueClause: "+str(v)) res = [] if not v.var: return res if not v.value: return res if not isinstance(v.value[0], list): for val in v.value: res.append({v.var[0]: val}) else: for vals in v.value: res.append(dict(zip(v.var, vals))) return CompValue('values', res=res)
def prolog_to_filter_expression(e, env, pe, var_map, kb): if isinstance(e, Predicate): if e.name == '=': return _prolog_relational_expression('=', e.args, env, pe, var_map, kb) elif e.name == '\=': return _prolog_relational_expression('!=', e.args, env, pe, var_map, kb) elif e.name == '<': return _prolog_relational_expression('<', e.args, env, pe, var_map, kb) elif e.name == '>': return _prolog_relational_expression('>', e.args, env, pe, var_map, kb) elif e.name == '=<': return _prolog_relational_expression('<=', e.args, env, pe, var_map, kb) elif e.name == '>=': return _prolog_relational_expression('>=', e.args, env, pe, var_map, kb) elif e.name == 'is': pre = _prolog_relational_expression('is', e.args, env, pe, var_map, kb) return pre elif e.name == 'and': return _prolog_conditional_expression('ConditionalAndExpression', e.args, env, pe, var_map, kb) elif e.name == 'or': return _prolog_conditional_expression('ConditionalOrExpression', e.args, env, pe, var_map, kb) elif e.name == 'lang': if len(e.args) != 1: raise PrologError( 'lang filter expression: one argument expected.') return CompValue('Builtin_LANG', arg=prolog_to_filter_expression( e.args[0], env, pe, var_map, kb), _vars=set(var_map.values())) return pl_to_rdf(e, env, pe, var_map, kb)
def Minus(p1, p2): return CompValue("Minus", p1=p1, p2=p2)
def Join(p1, p2): return CompValue("Join", p1=p1, p2=p2)
def Values(res): return CompValue("values", res=res)
def Filter(expr, p): return CompValue("Filter", expr=expr, p=p)
def Graph(term, graph): return CompValue("Graph", term=term, p=graph)
def Extend(p, expr, var): return CompValue('Extend', p=p, expr=expr, var=var)
def Filter(expr, p): return CompValue('Filter', expr=expr, p=p)
def BGP(triples=None): return CompValue('BGP', triples=triples or [])
def Values(res): return CompValue('values', res=res)
def BGP(triples=None): return CompValue("BGP", triples=triples or [])
def translate(q): """ http://www.w3.org/TR/sparql11-query/#convertSolMod """ _traverse(q, _simplifyFilters) q.where = traverse(q.where, visitPost=translatePath) # TODO: Var scope test VS = set() traverse(q.where, functools.partial(_findVars, res=VS)) # all query types have a where part M = translateGroupGraphPattern(q.where) aggregate = False if q.groupby: conditions = [] # convert "GROUP BY (?expr as ?var)" to an Extend for c in q.groupby.condition: if isinstance(c, CompValue) and c.name == "GroupAs": M = Extend(M, c.expr, c.var) c = c.var conditions.append(c) M = Group(p=M, expr=conditions) aggregate = True elif (traverse(q.having, _hasAggregate, complete=False) or traverse(q.orderby, _hasAggregate, complete=False) or any( traverse(x.expr, _hasAggregate, complete=False) for x in q.projection or [] if x.evar)): # if any aggregate is used, implicit group by M = Group(p=M) aggregate = True if aggregate: M, E = translateAggregates(q, M) else: E = [] # HAVING if q.having: M = Filter(expr=and_(*q.having.condition), p=M) # VALUES if q.valuesClause: M = Join(p1=M, p2=ToMultiSet(translateValues(q.valuesClause))) if not q.projection: # select * PV = list(VS) else: PV = list() for v in q.projection: if v.var: if v not in PV: PV.append(v.var) elif v.evar: if v not in PV: PV.append(v.evar) E.append((v.expr, v.evar)) else: raise Exception("I expected a var or evar here!") for e, v in E: M = Extend(M, e, v) # ORDER BY if q.orderby: M = OrderBy( M, [ CompValue("OrderCondition", expr=c.expr, order=c.order) for c in q.orderby.condition ], ) # PROJECT M = Project(M, PV) if q.modifier: if q.modifier == "DISTINCT": M = CompValue("Distinct", p=M) elif q.modifier == "REDUCED": M = CompValue("Reduced", p=M) if q.limitoffset: offset = 0 if q.limitoffset.offset is not None: offset = q.limitoffset.offset.toPython() if q.limitoffset.limit is not None: M = CompValue("Slice", p=M, start=offset, length=q.limitoffset.limit.toPython()) else: M = CompValue("Slice", p=M, start=offset) return M, PV
def Project(p, PV): return CompValue('Project', p=p, PV=PV)
def LeftJoin(p1, p2, expr): return CompValue("LeftJoin", p1=p1, p2=p2, expr=expr)
def Group(p, expr=None): return CompValue('Group', p=p, expr=expr)
def Extend(p, expr, var): return CompValue("Extend", p=p, expr=expr, var=var)
def OrderBy(p, expr): return CompValue("OrderBy", p=p, expr=expr)
def Project(p, PV): return CompValue("Project", p=p, PV=PV)
def ToMultiSet(p): return CompValue("ToMultiSet", p=p)
def Group(p, expr=None): return CompValue("Group", p=p, expr=expr)
def Union(p1, p2): return CompValue("Union", p1=p1, p2=p2)