Esempio n. 1
0
def _variablesToArray(variables, name=''):
    """Turn an array of Variables or query strings into an array of query strings. If the 'variables'
    is in fact a single string or Variable, then it is also put into an array.

    @param variables: a string, a unicode, or a Variable, or an array of those (can be mixed, actually). As a special case,
    if the value is "*", it returns None (this corresponds to the wildcard in SPARQL)
    @param name: the string to be used in the error message
    """
    if isinstance(variables, basestring):
        if variables == "*":
            return None
        else:
            return [variables]
    elif isinstance(variables, Unbound):
        return [variables.name]
    elif type(variables) == list or type(variables) == tuple:
        retval = []
        for s in variables:
            if isinstance(s, basestring):
                retval.append(s)
            elif isinstance(s, Unbound):
                retval.append(s.name)
            else:
                raise SPARQLError(
                    "illegal type in '%s'; must be a string, unicode, or a Variable"
                    % name)
    else:
        raise SPARQLError(
            "'%s' argument must be a string, a Variable, or a list of those" %
            name)
    return retval
Esempio n. 2
0
def query(graph, selection, patterns, optionalPatterns=[], initialBindings={}):
    """
    A shorthand for the creation of a L{Query} instance, returning
    the result of a L{Query.select} right away. Good for most of
    the usage, when no more action (clustering, etc) is required.

    @param selection: a list or tuple with the selection criteria,
    or a single string. Each entry is a string that begins with a"?".

    @param patterns: either a
    L{GraphPattern<rdflib.sparql.graphPattern.GraphPattern>}
    instance or a list of instances thereof. Each pattern in the
    list represent an 'OR' (or 'UNION') branch in SPARQL.

    @param optionalPatterns: either a
    L{GraphPattern<rdflib.sparql.graphPattern.GraphPattern>}
    instance or a list of instances thereof. For each elements in
    the 'patterns' parameter is combined with each of the optional
    patterns and the results are concatenated. The list may be
    empty.

    @return: list of query results
    @rtype: list of tuples
    """
    result = queryObject(graph, patterns, optionalPatterns, initialBindings)
    if result == None:
        # generate some proper output for the exception :-)
        msg = "Errors in the patterns, no valid query object generated; "
        if isinstance(patterns, GraphPattern):
            msg += ("pattern:\n%s" % patterns)
        else:
            msg += ("pattern:\n%s\netc..." % patterns[0])
        raise SPARQLError(msg)
    return result.select(selection)
Esempio n. 3
0
 def checkArg(arg, error):
     if arg == None:
         return []
     elif isinstance(arg, GraphPattern):
         return [arg]
     elif type(arg) == list or type(arg) == tuple:
         for p in arg:
             if not isinstance(p, GraphPattern):
                 raise SPARQLError(
                     "'%s' argument must be a GraphPattern or a list of those"
                     % error)
         return arg
     else:
         raise SPARQLError(
             "'%s' argument must be a GraphPattern or a list of those" %
             error)
Esempio n. 4
0
    def _generatePattern(self, tupl):
        """
        Append a tuple to the local patterns. Possible type literals
        are converted to real literals on the fly.  Each tuple should
        be contain either 3 elements (for an RDF Triplet pattern) or
        four, where the fourth element is a per-pattern constraint
        (filter). (The general constraint of SPARQL can be optimized
        by assigning a constraint to a specific pattern; because it
        stops the graph expansion, its usage might be much more
        optimal than the the 'global' constraint).

        @param tupl: either a three or four element tuple
        """
        if type(tupl) != tuple:
            raise SPARQLError(
                "illegal argument, pattern must be a tuple, got %s" %
                type(tupl))
        if len(tupl) != 3 and len(tupl) != 4:
            raise SPARQLError(
                "illegal argument, pattern must be a tuple of 3 or 4 element, got %s"
                % len(tupl))
        if len(tupl) == 3:
            (s, p, o) = tupl
            f = None
        else:
            (s, p, o, f) = tupl
        final = []
        for c in (s, p, o):
            if _isResQuest(c):
                if not c in self.unbounds:
                    self.unbounds.append(c)
                final.append(c)
            elif isinstance(c, BNode):
                #Do nothing - BNode name management is handled by SPARQL parser
                #                if not c in self.bnodes :
                #                    self.bnodes[c] = BNode()
                final.append(c)
            else:
                final.append(_createResource(c))
        final.append(f)
        return tuple(final)
Esempio n. 5
0
 def __init__(self, patterns=[]):
     """
     @param patterns: an initial list of graph pattern tuples
     """
     self.patterns = []
     self.constraints = []
     self.unbounds = []
     self.bnodes = {}
     if type(patterns) == list:
         self.addPatterns(patterns)
     elif type(patterns) == tuple:
         self.addPattern(patterns)
     else:
         raise SPARQLError(
             "illegal argument, pattern must be a tuple or a list of tuples"
         )
Esempio n. 6
0
    def addConstraint(self, func):
        """
        Add a global filter constraint to the graph pattern. 'func'
        must be a method with a single input parameter (a dictionary)
        returning a boolean. This method is I{added} to previously
        added methods, ie, I{all} methods must return True to accept a
        binding.

        @param func: filter function
        """
        if type(func) == FunctionType:
            self.constraints.append(func)
        else:
            raise SPARQLError(
                "illegal argument, constraint must be a function type, got %s"
                % type(func))
Esempio n. 7
0
def _checkOptionals(pattern, optionals):
    """
    The following remark in the SPARQL document is important:

    'If a new variable is mentioned in an optional block (as mbox and
    hpage are mentioned in the previous example), that variable can be
    mentioned in that block and can not be mentioned in a subsequent
    block.'

    What this means is that the various optional blocks do not
    interefere at this level and there is no need for a check whether
    a binding in a subsequent block clashes with an earlier optional
    block.

    This method checks whether this requirement is fulfilled. Raises a
    SPARQLError exception if it is not (the rest of the algorithm
    relies on this, so checking it is a good idea...)

    @param pattern: graph pattern
    @type pattern: L{GraphPattern<rdflib.sparql.GraphPattern>}
    @param optionals: graph pattern
    @type optionals: L{GraphPattern<rdflib.sparql.GraphPattern>}
    @raise SPARQLError: if the requirement is not fulfilled
    """
    for i in xrange(0, len(optionals)):
        for c in optionals[i].unbounds:
            if c in pattern.unbounds:
                # this is fine, an optional query variable can appear in the main pattern, too
                continue
            if i > 0:
                for j in xrange(0, i):
                    if c in optionals[j].unbounds:
                        # This means that:
                        #   - the variable is not in the main pattern (because the previous if would have taken care of it)
                        #   - the variable is in the previous optional: ie, Error!
                        raise SPARQLError(
                            "%s is an illegal query string, it appear in a previous OPTIONAL clause"
                            % c)
Esempio n. 8
0
    def select(self,
               selection,
               distinct=True,
               limit=None,
               orderBy=None,
               orderAscend=None,
               offset=0):
        """
        Run a selection on the query.

        @param selection: Either a single query string, or an array or tuple thereof.
        @param distinct: if True, identical results are filtered out
        @type distinct: Boolean
        @param limit: if set to an integer value, the first 'limit' number of results are returned; all of them otherwise
        @type limit: non negative integer
        @param orderBy: either a function or a list of strings (corresponding to variables in the query). If None, no sorting occurs
        on the results. If the parameter is a function, it must take two dictionary arguments (the binding dictionaries), return
        -1, 0, and 1, corresponding to smaller, equal, and greater, respectively.
        @param orderAscend: if not None, then an array of booelans of the same length as orderBy, True for ascending and False
		for descending. If None, an ascending order is used.
        @offset the starting point of return values in the array of results. Obviously, this parameter makes real sense if
        some sort of order is defined.
        @return: selection results
        @rtype: list of tuples
        @raise SPARQLError: invalid selection argument
        """
        def _uniquefyList(lst):
            """Return a copy of the list but possible duplicate elements are taken out. Used to
            post-process the outcome of the query
            @param lst: input list
            @return: result list
            """
            if len(lst) <= 1:
                return lst
            else:
                # must be careful! Using the quick method of Sets destroy the order. Ie, if this was ordered, then
                # a slower but more secure method should be used
                if orderBy != None:
                    retval = []
                    for i in xrange(0, len(lst)):
                        v = lst[i]
                        skip = False
                        for w in retval:
                            if w == v:
                                skip = True
                                break
                        if not skip:
                            retval.append(v)
                    return retval
                else:
                    return list(sets.Set(lst))

        # Select may be a single query string, or an array/tuple thereof
        selectionF = _variablesToArray(selection, "selection")

        if type(offset) is not types.IntType or offset < 0:
            raise SPARQLError("'offset' argument is invalid")

        if limit != None:
            if type(limit) is not types.IntType or limit < 0:
                raise SPARQLError("'offset' argument is invalid")

        if orderBy != None:
            results = self._orderedSelect(selectionF, orderBy, orderAscend)
        else:
            if self.parent1 != None and self.parent2 != None:
                results = self.parent1.select(
                    selectionF) + self.parent2.select(selectionF)
            else:
                # remember: _processResult turns the expansion results (an array of dictionaries)
                # into an array of tuples in the right, original order
                results = _processResults(selectionF,
                                          self.top.returnResult(selectionF))
        if distinct:
            retval = _uniquefyList(results)
        else:
            retval = results

        if limit != None:
            return retval[offset:limit + offset]
        elif offset > 0:
            return retval[offset:]
        else:
            return retval
Esempio n. 9
0
    def _orderedSelect(self, selection, orderedBy, orderDirection):
        """
        The variant of the selection (as below) that also includes the sorting. Because that is much less efficient, this is
        separated into a distinct method that is called only if necessary. It is called from the L{select<select>} method.
		
        Because order can be made on variables that are not part of the final selection, this method retrieves a I{full}
        binding from the result to be able to order it (whereas the core L{select<select>} method retrieves from the result
        the selected bindings only). The full binding is an array of (binding) dictionaries; the sorting sorts this array
        by comparing the bound variables in the respective dictionaries. Once this is done, the final selection is done.

        @param selection: Either a single query string, or an array or tuple thereof.
        @param orderBy: either a function or a list of strings (corresponding to variables in the query). If None, no sorting occurs
        on the results. If the parameter is a function, it must take two dictionary arguments (the binding dictionaries), return
        -1, 0, and 1, corresponding to smaller, equal, and greater, respectively.
        @param orderDirection: if not None, then an array of integers of the same length as orderBy, with values the constants
        ASC or DESC (defined in the module). If None, an ascending order is used.
        @return: selection results
        @rtype: list of tuples
        @raise SPARQLError: invalid sorting arguments
        """
        fullBinding = self._getFullBinding()
        if type(orderedBy) is types.FunctionType:
            _sortBinding = orderedBy
        else:
            orderKeys = _variablesToArray(orderedBy, "orderBy")
            # see the direction
            oDir = None  # this is just to fool the interpreter's error message
            if orderDirection is None:
                oDir = [True for i in xrange(0, len(orderKeys))]
            elif type(orderDirection) is types.BooleanType:
                oDir = [orderDirection]
            elif type(orderDirection) is not types.ListType and type(
                    orderDirection) is not types.TupleType:
                raise SPARQLError("'orderDirection' argument must be a list")
            elif len(orderDirection) != len(orderKeys):
                raise SPARQLError(
                    "'orderDirection' must be of an equal length to 'orderBy'")
            else:
                oDir = orderDirection

            def _sortBinding(b1, b2):
                """The sorting method used by the array sort, with return values as required by the python run-time
                The to-be-compared data are dictionaries of bindings
                """
                for i in xrange(0, len(orderKeys)):
                    # each key has to be compared separately. If there is a clear comparison result on that key
                    # then we are done, but when that is not the case, the next in line should be used
                    key = orderKeys[i]
                    direction = oDir[i]
                    if key in b1 and key in b2:
                        val1 = b1[key]
                        val2 = b2[key]
                        if val1 != None and val2 != None:
                            if direction:
                                if val1 < val2: return -1
                                elif val1 > val2: return 1
                            else:
                                if val1 > val2: return -1
                                elif val1 < val2: return 1
                return 0

        # get the full Binding sorted
        fullBinding.sort(_sortBinding)
        # remember: _processResult turns the expansion results (an array of dictionaries)
        # into an array of tuples in the right, original order
        retval = _processResults(selection, fullBinding)
        return retval
Esempio n. 10
0
def Evaluate(graph, query, passedBindings={}, DEBUG=False):
    """
    Takes:
        1. a rdflib.Graph.Graph instance 
        2. a SPARQL query instance (parsed using the BisonGen parser)
        3. A dictionary of initial variable bindings (varName -> .. rdflib Term .. )
        4. DEBUG Flag

    Returns a list of tuples - each a binding of the selected variables in query order
    """
    if query.prolog:
        query.prolog.DEBUG = DEBUG
    if query.query.dataSets:
        graphs = []
        for dtSet in query.query.dataSets:
            if isinstance(dtSet, NamedGraph):
                graphs.append(Graph(graph.store, dtSet))
            else:
                memStore = plugin.get('IOMemory', Store)()
                memGraph = Graph(memStore)
                try:
                    memGraph.parse(dtSet, format='n3')
                except:
                    #Parse as RDF/XML instead
                    memGraph.parse(dtSet)
                graphs.append(memGraph)
        tripleStore = sparqlGraph.SPARQLGraph(ReadOnlyGraphAggregate(graphs))
    else:
        tripleStore = sparqlGraph.SPARQLGraph(graph)

    if isinstance(query.query, SelectQuery) and query.query.variables:
        query.query.variables = [
            convertTerm(item, query.prolog) for item in query.query.variables
        ]
    else:
        query.query.variables = []

    #Interpret Graph Graph Patterns as Named Graphs
    graphGraphPatterns = categorizeGroupGraphPattern(
        query.query.whereClause.parsedGraphPattern)[0]
    #    rt = categorizeGroupGraphPattern(query.query.whereClause.parsedGraphPattern)[0]
    #    print rt[0], rt[1]
    if graphGraphPatterns:
        graphGraphP = graphGraphPatterns[0].nonTripleGraphPattern
        if isinstance(graphGraphP.name, Variable):
            if graphGraphP.name in passedBindings:
                tripleStore = sparqlGraph.SPARQLGraph(
                    Graph(graph.store, passedBindings[graphGraphP.name]))
            else:
                #print graphGraphP
                #raise Exception("Graph Graph Patterns can only be used with variables bound at the top level or a URIRef or BNode term")
                tripleStore = sparqlGraph.SPARQLGraph(
                    graph, graphVariable=graphGraphP.name)
        else:
            graphName = isinstance(graphGraphP.name,
                                   Variable) and passedBindings[
                                       graphGraphP.name] or graphGraphP.name
            graphName = convertTerm(graphName, query.prolog)
            if isinstance(graph, ReadOnlyGraphAggregate) and not graph.store:
                targetGraph = [
                    g for g in graph.graphs if g.identifier == graphName
                ]
                assert len(targetGraph) == 1
                targetGraph = targetGraph[0]
            else:
                targetGraph = Graph(graph.store, graphName)
            tripleStore = sparqlGraph.SPARQLGraph(targetGraph)

    gp = reorderGroupGraphPattern(query.query.whereClause.parsedGraphPattern)
    validateGroupGraphPattern(gp)
    basicPatterns, optionalPatterns = sparqlPSetup(gp, query.prolog)

    if DEBUG:
        print "## Select Variables ##\n", query.query.variables
        print "## Patterns ##\n", basicPatterns
        print "## OptionalPatterns ##\n", optionalPatterns

    result = queryObject(tripleStore, basicPatterns, optionalPatterns,
                         passedBindings)
    if result == None:
        # generate some proper output for the exception :-)
        msg = "Errors in the patterns, no valid query object generated; "
        msg += ("pattern:\n%s\netc..." % basicPatterns[0])
        raise SPARQLError(msg)

    if isinstance(query.query, AskQuery):
        return result.ask()

    elif isinstance(query.query, SelectQuery):
        orderBy = None
        orderAsc = None
        if query.query.solutionModifier.orderClause:
            orderBy = []
            orderAsc = []
            for orderCond in query.query.solutionModifier.orderClause:
                # is it a variable?
                if isinstance(orderCond, Variable):
                    orderBy.append(orderCond)
                    orderAsc.append(ASCENDING_ORDER)
                # is it another expression, only variables are supported
                else:
                    expr = orderCond.expression
                    assert isinstance(
                        expr, Variable
                    ), "Support for ORDER BY with anything other than a variable is not supported: %s" % expr
                    orderBy.append(expr)
                    orderAsc.append(orderCond.order == ASCENDING_ORDER)

        limit = query.query.solutionModifier.limitClause and int(
            query.query.solutionModifier.limitClause) or None

        offset = query.query.solutionModifier.offsetClause and int(
            query.query.solutionModifier.offsetClause) or 0
        return result.select(query.query.variables, query.query.distinct,
                             limit, orderBy, orderAsc,
                             offset), _variablesToArray(
                                 query.query.variables,
                                 "selection"), result._getAllVariables(
                                 ), orderBy, query.query.distinct
    else:
        raise NotImplemented(CONSTRUCT_NOT_SUPPORTED, repr(query))