def _variablesToArray(variables, name=''): """Turn an array of Variables or query strings into an array of query strings. If the 'variables' is in fact a single string or Variable, then it is also put into an array. @param variables: a string, a unicode, or a Variable, or an array of those (can be mixed, actually). As a special case, if the value is "*", it returns None (this corresponds to the wildcard in SPARQL) @param name: the string to be used in the error message """ if isinstance(variables, basestring): if variables == "*": return None else: return [variables] elif isinstance(variables, Unbound): return [variables.name] elif type(variables) == list or type(variables) == tuple: retval = [] for s in variables: if isinstance(s, basestring): retval.append(s) elif isinstance(s, Unbound): retval.append(s.name) else: raise SPARQLError( "illegal type in '%s'; must be a string, unicode, or a Variable" % name) else: raise SPARQLError( "'%s' argument must be a string, a Variable, or a list of those" % name) return retval
def query(graph, selection, patterns, optionalPatterns=[], initialBindings={}): """ A shorthand for the creation of a L{Query} instance, returning the result of a L{Query.select} right away. Good for most of the usage, when no more action (clustering, etc) is required. @param selection: a list or tuple with the selection criteria, or a single string. Each entry is a string that begins with a"?". @param patterns: either a L{GraphPattern<rdflib.sparql.graphPattern.GraphPattern>} instance or a list of instances thereof. Each pattern in the list represent an 'OR' (or 'UNION') branch in SPARQL. @param optionalPatterns: either a L{GraphPattern<rdflib.sparql.graphPattern.GraphPattern>} instance or a list of instances thereof. For each elements in the 'patterns' parameter is combined with each of the optional patterns and the results are concatenated. The list may be empty. @return: list of query results @rtype: list of tuples """ result = queryObject(graph, patterns, optionalPatterns, initialBindings) if result == None: # generate some proper output for the exception :-) msg = "Errors in the patterns, no valid query object generated; " if isinstance(patterns, GraphPattern): msg += ("pattern:\n%s" % patterns) else: msg += ("pattern:\n%s\netc..." % patterns[0]) raise SPARQLError(msg) return result.select(selection)
def checkArg(arg, error): if arg == None: return [] elif isinstance(arg, GraphPattern): return [arg] elif type(arg) == list or type(arg) == tuple: for p in arg: if not isinstance(p, GraphPattern): raise SPARQLError( "'%s' argument must be a GraphPattern or a list of those" % error) return arg else: raise SPARQLError( "'%s' argument must be a GraphPattern or a list of those" % error)
def _generatePattern(self, tupl): """ Append a tuple to the local patterns. Possible type literals are converted to real literals on the fly. Each tuple should be contain either 3 elements (for an RDF Triplet pattern) or four, where the fourth element is a per-pattern constraint (filter). (The general constraint of SPARQL can be optimized by assigning a constraint to a specific pattern; because it stops the graph expansion, its usage might be much more optimal than the the 'global' constraint). @param tupl: either a three or four element tuple """ if type(tupl) != tuple: raise SPARQLError( "illegal argument, pattern must be a tuple, got %s" % type(tupl)) if len(tupl) != 3 and len(tupl) != 4: raise SPARQLError( "illegal argument, pattern must be a tuple of 3 or 4 element, got %s" % len(tupl)) if len(tupl) == 3: (s, p, o) = tupl f = None else: (s, p, o, f) = tupl final = [] for c in (s, p, o): if _isResQuest(c): if not c in self.unbounds: self.unbounds.append(c) final.append(c) elif isinstance(c, BNode): #Do nothing - BNode name management is handled by SPARQL parser # if not c in self.bnodes : # self.bnodes[c] = BNode() final.append(c) else: final.append(_createResource(c)) final.append(f) return tuple(final)
def __init__(self, patterns=[]): """ @param patterns: an initial list of graph pattern tuples """ self.patterns = [] self.constraints = [] self.unbounds = [] self.bnodes = {} if type(patterns) == list: self.addPatterns(patterns) elif type(patterns) == tuple: self.addPattern(patterns) else: raise SPARQLError( "illegal argument, pattern must be a tuple or a list of tuples" )
def addConstraint(self, func): """ Add a global filter constraint to the graph pattern. 'func' must be a method with a single input parameter (a dictionary) returning a boolean. This method is I{added} to previously added methods, ie, I{all} methods must return True to accept a binding. @param func: filter function """ if type(func) == FunctionType: self.constraints.append(func) else: raise SPARQLError( "illegal argument, constraint must be a function type, got %s" % type(func))
def _checkOptionals(pattern, optionals): """ The following remark in the SPARQL document is important: 'If a new variable is mentioned in an optional block (as mbox and hpage are mentioned in the previous example), that variable can be mentioned in that block and can not be mentioned in a subsequent block.' What this means is that the various optional blocks do not interefere at this level and there is no need for a check whether a binding in a subsequent block clashes with an earlier optional block. This method checks whether this requirement is fulfilled. Raises a SPARQLError exception if it is not (the rest of the algorithm relies on this, so checking it is a good idea...) @param pattern: graph pattern @type pattern: L{GraphPattern<rdflib.sparql.GraphPattern>} @param optionals: graph pattern @type optionals: L{GraphPattern<rdflib.sparql.GraphPattern>} @raise SPARQLError: if the requirement is not fulfilled """ for i in xrange(0, len(optionals)): for c in optionals[i].unbounds: if c in pattern.unbounds: # this is fine, an optional query variable can appear in the main pattern, too continue if i > 0: for j in xrange(0, i): if c in optionals[j].unbounds: # This means that: # - the variable is not in the main pattern (because the previous if would have taken care of it) # - the variable is in the previous optional: ie, Error! raise SPARQLError( "%s is an illegal query string, it appear in a previous OPTIONAL clause" % c)
def select(self, selection, distinct=True, limit=None, orderBy=None, orderAscend=None, offset=0): """ Run a selection on the query. @param selection: Either a single query string, or an array or tuple thereof. @param distinct: if True, identical results are filtered out @type distinct: Boolean @param limit: if set to an integer value, the first 'limit' number of results are returned; all of them otherwise @type limit: non negative integer @param orderBy: either a function or a list of strings (corresponding to variables in the query). If None, no sorting occurs on the results. If the parameter is a function, it must take two dictionary arguments (the binding dictionaries), return -1, 0, and 1, corresponding to smaller, equal, and greater, respectively. @param orderAscend: if not None, then an array of booelans of the same length as orderBy, True for ascending and False for descending. If None, an ascending order is used. @offset the starting point of return values in the array of results. Obviously, this parameter makes real sense if some sort of order is defined. @return: selection results @rtype: list of tuples @raise SPARQLError: invalid selection argument """ def _uniquefyList(lst): """Return a copy of the list but possible duplicate elements are taken out. Used to post-process the outcome of the query @param lst: input list @return: result list """ if len(lst) <= 1: return lst else: # must be careful! Using the quick method of Sets destroy the order. Ie, if this was ordered, then # a slower but more secure method should be used if orderBy != None: retval = [] for i in xrange(0, len(lst)): v = lst[i] skip = False for w in retval: if w == v: skip = True break if not skip: retval.append(v) return retval else: return list(sets.Set(lst)) # Select may be a single query string, or an array/tuple thereof selectionF = _variablesToArray(selection, "selection") if type(offset) is not types.IntType or offset < 0: raise SPARQLError("'offset' argument is invalid") if limit != None: if type(limit) is not types.IntType or limit < 0: raise SPARQLError("'offset' argument is invalid") if orderBy != None: results = self._orderedSelect(selectionF, orderBy, orderAscend) else: if self.parent1 != None and self.parent2 != None: results = self.parent1.select( selectionF) + self.parent2.select(selectionF) else: # remember: _processResult turns the expansion results (an array of dictionaries) # into an array of tuples in the right, original order results = _processResults(selectionF, self.top.returnResult(selectionF)) if distinct: retval = _uniquefyList(results) else: retval = results if limit != None: return retval[offset:limit + offset] elif offset > 0: return retval[offset:] else: return retval
def _orderedSelect(self, selection, orderedBy, orderDirection): """ The variant of the selection (as below) that also includes the sorting. Because that is much less efficient, this is separated into a distinct method that is called only if necessary. It is called from the L{select<select>} method. Because order can be made on variables that are not part of the final selection, this method retrieves a I{full} binding from the result to be able to order it (whereas the core L{select<select>} method retrieves from the result the selected bindings only). The full binding is an array of (binding) dictionaries; the sorting sorts this array by comparing the bound variables in the respective dictionaries. Once this is done, the final selection is done. @param selection: Either a single query string, or an array or tuple thereof. @param orderBy: either a function or a list of strings (corresponding to variables in the query). If None, no sorting occurs on the results. If the parameter is a function, it must take two dictionary arguments (the binding dictionaries), return -1, 0, and 1, corresponding to smaller, equal, and greater, respectively. @param orderDirection: if not None, then an array of integers of the same length as orderBy, with values the constants ASC or DESC (defined in the module). If None, an ascending order is used. @return: selection results @rtype: list of tuples @raise SPARQLError: invalid sorting arguments """ fullBinding = self._getFullBinding() if type(orderedBy) is types.FunctionType: _sortBinding = orderedBy else: orderKeys = _variablesToArray(orderedBy, "orderBy") # see the direction oDir = None # this is just to fool the interpreter's error message if orderDirection is None: oDir = [True for i in xrange(0, len(orderKeys))] elif type(orderDirection) is types.BooleanType: oDir = [orderDirection] elif type(orderDirection) is not types.ListType and type( orderDirection) is not types.TupleType: raise SPARQLError("'orderDirection' argument must be a list") elif len(orderDirection) != len(orderKeys): raise SPARQLError( "'orderDirection' must be of an equal length to 'orderBy'") else: oDir = orderDirection def _sortBinding(b1, b2): """The sorting method used by the array sort, with return values as required by the python run-time The to-be-compared data are dictionaries of bindings """ for i in xrange(0, len(orderKeys)): # each key has to be compared separately. If there is a clear comparison result on that key # then we are done, but when that is not the case, the next in line should be used key = orderKeys[i] direction = oDir[i] if key in b1 and key in b2: val1 = b1[key] val2 = b2[key] if val1 != None and val2 != None: if direction: if val1 < val2: return -1 elif val1 > val2: return 1 else: if val1 > val2: return -1 elif val1 < val2: return 1 return 0 # get the full Binding sorted fullBinding.sort(_sortBinding) # remember: _processResult turns the expansion results (an array of dictionaries) # into an array of tuples in the right, original order retval = _processResults(selection, fullBinding) return retval
def Evaluate(graph, query, passedBindings={}, DEBUG=False): """ Takes: 1. a rdflib.Graph.Graph instance 2. a SPARQL query instance (parsed using the BisonGen parser) 3. A dictionary of initial variable bindings (varName -> .. rdflib Term .. ) 4. DEBUG Flag Returns a list of tuples - each a binding of the selected variables in query order """ if query.prolog: query.prolog.DEBUG = DEBUG if query.query.dataSets: graphs = [] for dtSet in query.query.dataSets: if isinstance(dtSet, NamedGraph): graphs.append(Graph(graph.store, dtSet)) else: memStore = plugin.get('IOMemory', Store)() memGraph = Graph(memStore) try: memGraph.parse(dtSet, format='n3') except: #Parse as RDF/XML instead memGraph.parse(dtSet) graphs.append(memGraph) tripleStore = sparqlGraph.SPARQLGraph(ReadOnlyGraphAggregate(graphs)) else: tripleStore = sparqlGraph.SPARQLGraph(graph) if isinstance(query.query, SelectQuery) and query.query.variables: query.query.variables = [ convertTerm(item, query.prolog) for item in query.query.variables ] else: query.query.variables = [] #Interpret Graph Graph Patterns as Named Graphs graphGraphPatterns = categorizeGroupGraphPattern( query.query.whereClause.parsedGraphPattern)[0] # rt = categorizeGroupGraphPattern(query.query.whereClause.parsedGraphPattern)[0] # print rt[0], rt[1] if graphGraphPatterns: graphGraphP = graphGraphPatterns[0].nonTripleGraphPattern if isinstance(graphGraphP.name, Variable): if graphGraphP.name in passedBindings: tripleStore = sparqlGraph.SPARQLGraph( Graph(graph.store, passedBindings[graphGraphP.name])) else: #print graphGraphP #raise Exception("Graph Graph Patterns can only be used with variables bound at the top level or a URIRef or BNode term") tripleStore = sparqlGraph.SPARQLGraph( graph, graphVariable=graphGraphP.name) else: graphName = isinstance(graphGraphP.name, Variable) and passedBindings[ graphGraphP.name] or graphGraphP.name graphName = convertTerm(graphName, query.prolog) if isinstance(graph, ReadOnlyGraphAggregate) and not graph.store: targetGraph = [ g for g in graph.graphs if g.identifier == graphName ] assert len(targetGraph) == 1 targetGraph = targetGraph[0] else: targetGraph = Graph(graph.store, graphName) tripleStore = sparqlGraph.SPARQLGraph(targetGraph) gp = reorderGroupGraphPattern(query.query.whereClause.parsedGraphPattern) validateGroupGraphPattern(gp) basicPatterns, optionalPatterns = sparqlPSetup(gp, query.prolog) if DEBUG: print "## Select Variables ##\n", query.query.variables print "## Patterns ##\n", basicPatterns print "## OptionalPatterns ##\n", optionalPatterns result = queryObject(tripleStore, basicPatterns, optionalPatterns, passedBindings) if result == None: # generate some proper output for the exception :-) msg = "Errors in the patterns, no valid query object generated; " msg += ("pattern:\n%s\netc..." % basicPatterns[0]) raise SPARQLError(msg) if isinstance(query.query, AskQuery): return result.ask() elif isinstance(query.query, SelectQuery): orderBy = None orderAsc = None if query.query.solutionModifier.orderClause: orderBy = [] orderAsc = [] for orderCond in query.query.solutionModifier.orderClause: # is it a variable? if isinstance(orderCond, Variable): orderBy.append(orderCond) orderAsc.append(ASCENDING_ORDER) # is it another expression, only variables are supported else: expr = orderCond.expression assert isinstance( expr, Variable ), "Support for ORDER BY with anything other than a variable is not supported: %s" % expr orderBy.append(expr) orderAsc.append(orderCond.order == ASCENDING_ORDER) limit = query.query.solutionModifier.limitClause and int( query.query.solutionModifier.limitClause) or None offset = query.query.solutionModifier.offsetClause and int( query.query.solutionModifier.offsetClause) or 0 return result.select(query.query.variables, query.query.distinct, limit, orderBy, orderAsc, offset), _variablesToArray( query.query.variables, "selection"), result._getAllVariables( ), orderBy, query.query.distinct else: raise NotImplemented(CONSTRUCT_NOT_SUPPORTED, repr(query))