Beispiel #1
0
def SipStrategy(query,
                sipCollection,
                factGraph,
                derivedPreds,
                bindings={},
                processedRules=None,
                network=None,
                debug=False,
                buildProof=False,
                memoizeMemory=None,
                proofLevel=1):
    """
    Accordingly, we define a sip-strategy for computing the answers to a query
    expressed using a set of Datalog rules, and a set of sips, one for each
    adornment of a rule head, as follows...

    Each evaluation uses memoization (via Python decorators) but also relies on well-formed
    rewrites for using semi-naive bottom up method over large SPARQL data.

    """
    memoizeMemory = memoizeMemory and memoizeMemory or {}
    queryLiteral = BuildUnitermFromTuple(query)
    processedRules = processedRules and processedRules or set()
    if bindings:
        # There are bindings.  Apply them to the terms in the query
        queryLiteral.ground(bindings)

    if debug:
        print("%sSolving" % ('\t' * proofLevel), queryLiteral, bindings)
    # Only consider ground triple pattern isomorphism with matching bindings
    goalRDFStatement = queryLiteral.toRDFTuple()

    if queryLiteral in memoizeMemory:
        if debug:
            print("%sReturning previously calculated results for " %
                  ('\t' * proofLevel), queryLiteral)
        for answers in memoizeMemory[queryLiteral]:
            yield answers
    elif AlphaNode(goalRDFStatement).alphaNetworkHash(
        True,
        skolemTerms=list(bindings.values())) in \
        [AlphaNode(r.toRDFTuple()).alphaNetworkHash(True,
                                                    skolemTerms=list(bindings.values()))
            for r in processedRules
         if AdornLiteral(goalRDFStatement).adornment ==
         r.adornment]:
        if debug:
            print("%s Goal already processed..." %
                  ('\t' * proofLevel))
    else:
        isGround = literalIsGround(queryLiteral)
        if buildProof:
            ns = NodeSet(goalRDFStatement,
                         network=network, identifier=BNode())
        else:
            ns = None
        # adornedProgram = factGraph.adornedProgram
        queryPred = GetOp(queryLiteral)
        if sipCollection is None:
            rules = []
        else:
            # For every rule head matching the query, we invoke the rule,
            # thus determining an adornment, and selecting a sip to follow
            rules = sipCollection.headToRule.get(queryPred, set())
            if None in sipCollection.headToRule:
                # If there are second order rules, we add them
                # since they are a 'wildcard'
                rules.update(sipCollection.headToRule[None])

        # maintained list of rules that haven't been processed before and
        # match the query
        validRules = []

        # each subquery contains values for the bound arguments that are passed
        # through the sip arcs entering the node corresponding to that literal. For
        # each subquery generated, there is a set of answers.
        answers = []

        # variableMapping = {}

        # Some TBox queries can be 'joined' together into SPARQL queries against
        # 'base' predicates via an RDF dataset
        # These atomic concept inclusion axioms can be evaluated together
        # using a disjunctive operator at the body of a horn clause
        # where each item is a query of the form uniPredicate(?X):
        # Or( uniPredicate1(?X1), uniPredicate2(?X), uniPredicate3(?X), ..)
        # In this way massive, conjunctive joins can be 'mediated'
        # between the stated facts and the top-down solver
        @parameterizedPredicate([i for i in derivedPreds])
        def IsAtomicInclusionAxiomRHS(rule, dPreds):
            """
            This is an atomic inclusion axiom with
            a variable (or bound) RHS:  uniPred(?ENTITY)
            """
            bodyList = list(iterCondition(rule.formula.body))
            body = first(bodyList)
            return GetOp(body) not in dPreds and \
                len(bodyList) == 1 and \
                body.op == RDF.type

        atomicInclusionAxioms = list(filter(
            IsAtomicInclusionAxiomRHS, rules))
        if atomicInclusionAxioms and len(atomicInclusionAxioms) > 1:
            if debug:
                print("\tCombining atomic inclusion axioms: ")
                pprint(atomicInclusionAxioms, sys.stderr)
            if buildProof:
                factStep = InferenceStep(ns, source='some RDF graph')
                ns.steps.append(factStep)

            axioms = [rule.formula.body
                      for rule in atomicInclusionAxioms]

            # attempt to exaustively apply any available substitutions
            # and determine if query if fully ground
            vars = [v for v in GetArgs(queryLiteral,
                                       secondOrder=True)
                    if isinstance(v, Variable)]
            openVars, axioms, _bindings = \
                normalizeBindingsAndQuery(vars,
                                          bindings,
                                          axioms)
            if openVars:
                # mappings = {}
                # See if we need to do any variable mappings from the query literals
                # to the literals in the applicable rules
                query, rt = EDBQuery(axioms,
                                     factGraph,
                                     openVars,
                                     _bindings).evaluate(debug,
                                                         symmAtomicInclusion=True)
                if buildProof:
                    # FIXME: subquery undefined
                    factStep.groundQuery = subquery
                for ans in rt:
                    if buildProof:
                        factStep.bindings.update(ans)
                    memoizeMemory.setdefault(queryLiteral, set()).add(
                        (prepMemiozedAns(ans), ns))
                    yield ans, ns
            else:
                # All the relevant derivations have been explored and the result
                # is a ground query we can directly execute against the facts
                if buildProof:
                    factStep.bindings.update(bindings)
                query, rt = EDBQuery(axioms,
                                     factGraph,
                                     _bindings).evaluate(debug,
                                                         symmAtomicInclusion=True)
                if buildProof:
                    # FIXME: subquery undefined
                    factStep.groundQuery = subquery
                memoizeMemory.setdefault(queryLiteral, set()).add(
                    (prepMemiozedAns(rt), ns))
                yield rt, ns
            rules = filter(
                lambda i: not IsAtomicInclusionAxiomRHS(i), rules)
        for rule in rules:
            # An exception is the special predicate ph; it is treated as a base
            # predicate and the tuples in it are those supplied for qb by
            # unification.
            headBindings = getBindingsFromLiteral(
                goalRDFStatement, rule.formula.head)
            # comboBindings = dict([(k, v) for k, v in itertools.chain(
            #                                           bindings.items(),
            #                                           headBindings.items())])
            varMap = rule.formula.head.getVarMapping(queryLiteral)
            if headBindings and\
                [term for term in rule.formula.head.getDistinguishedVariables(True)
                 if varMap.get(term, term) not in headBindings]:
                continue
            # subQueryAnswers = []
            # dontStop = True
            # projectedBindings = comboBindings.copy()
            if debug:
                print("%sProcessing rule" %
                      ('\t' * proofLevel), rule.formula)
                if debug and sipCollection:
                    print(
                        "Sideways Information Passing (sip) graph for %s: " % queryLiteral)
                    print(sipCollection.serialize(format='n3'))
                    for sip in SIPRepresentation(sipCollection):
                        print(sip)
            try:
                # Invoke the rule
                if buildProof:
                    step = InferenceStep(ns, rule.formula)
                else:
                    step = None
                for rt, step in\
                    invokeRule([headBindings],
                               iter(iterCondition(rule.formula.body)),
                               rule.sip,
                               (proofLevel + 1,
                                memoizeMemory,
                                sipCollection,
                                factGraph,
                                derivedPreds,
                                processedRules.union([
                                    AdornLiteral(query)])),
                               step=step,
                               debug=debug):
                    if rt:
                        if isinstance(rt, dict):
                            # We received a mapping and must rewrite it via
                            # correlation between the variables in the rule head
                            # and the variables in the original query (after applying
                            # bindings)
                            varMap = rule.formula.head.getVarMapping(
                                queryLiteral)
                            if varMap:
                                rt = MakeImmutableDict(
                                    refactorMapping(varMap, rt))
                            if buildProof:
                                step.bindings = rt
                        else:
                            if buildProof:
                                step.bindings = headBindings
                        validRules.append(rule)
                        if buildProof:
                            ns.steps.append(step)
                        if isGround:
                            yield True, ns
                        else:
                            memoizeMemory.setdefault(queryLiteral, set()).add(
                                (prepMemiozedAns(rt),
                                 ns))
                            yield rt, ns

            except RuleFailure:
                # Clean up failed antecedents
                if buildProof:
                    if ns in step.antecedents:
                        step.antecedents.remove(ns)
        if not validRules:
            # No rules matching, query factGraph for answers
            successful = False
            if buildProof:
                factStep = InferenceStep(ns, source='some RDF graph')
                ns.steps.append(factStep)
            if not isGround:
                subquery, rt = EDBQuery([queryLiteral],
                                        factGraph,
                                        [v for v in GetArgs(queryLiteral,
                                                            secondOrder=True)
                                         if isinstance(v, Variable)],

                                        bindings).evaluate(debug)
                if buildProof:
                    factStep.groundQuery = subquery
                for ans in rt:
                    successful = True
                    if buildProof:
                        factStep.bindings.update(ans)
                    memoizeMemory.setdefault(queryLiteral, set()).add(
                        (prepMemiozedAns(ans),
                         ns))
                    yield ans, ns
                if not successful and queryPred not in derivedPreds:
                    # Open query didn't return any results and the predicate
                    # is ostensibly marked as derived predicate, so we have
                    # failed
                    memoizeMemory.setdefault(
                        queryLiteral, set()).add((False, ns))
                    yield False, ns
            else:
                # All the relevant derivations have been explored and the result
                # is a ground query we can directly execute against the facts
                if buildProof:
                    factStep.bindings.update(bindings)

                subquery, rt = EDBQuery([queryLiteral],
                                        factGraph,
                                        bindings).evaluate(debug)
                if buildProof:
                    factStep.groundQuery = subquery
                memoizeMemory.setdefault(queryLiteral, set()).add(
                    (prepMemiozedAns(rt),
                     ns))
                yield rt, ns
Beispiel #2
0
def SipStrategy(query,
                sipCollection,
                factGraph,
                derivedPreds,
                bindings={},
                processedRules=None,
                network=None,
                debug=False,
                buildProof=False,
                memoizeMemory=None,
                proofLevel=1):
    """
    Accordingly, we define a sip-strategy for computing the answers to a query
    expressed using a set of Datalog rules, and a set of sips, one for each
    adornment of a rule head, as follows...

    Each evaluation uses memoization (via Python decorators) but also relies on well-formed
    rewrites for using semi-naive bottom up method over large SPARQL data.

    """
    memoizeMemory = memoizeMemory and memoizeMemory or {}
    queryLiteral = BuildUnitermFromTuple(query)
    processedRules = processedRules and processedRules or set()
    if bindings:
        #There are bindings.  Apply them to the terms in the query
        queryLiteral.ground(bindings)

    if debug:
        print("%sSolving" % ('\t' * proofLevel), queryLiteral, bindings)
    # Only consider ground triple pattern isomorphism with matching bindings
    goalRDFStatement = queryLiteral.toRDFTuple()

    if queryLiteral in memoizeMemory:
        if debug:
            print("%sReturning previously calculated results for " % \
                        ('\t' * proofLevel), queryLiteral)
        for answers in memoizeMemory[queryLiteral]:
            yield answers
    elif AlphaNode(goalRDFStatement).alphaNetworkHash(
                                      True,
                                      skolemTerms=list(bindings.values())) in \
        [AlphaNode(r.toRDFTuple()).alphaNetworkHash(True,
                                                    skolemTerms=list(bindings.values()))
            for r in processedRules
                if AdornLiteral(goalRDFStatement).adornment == \
                   r.adornment]:
        if debug:
            print("%s Goal already processed..." % \
                ('\t' * proofLevel))
    else:
        isGround = literalIsGround(queryLiteral)
        if buildProof:
            ns = NodeSet(goalRDFStatement, network=network, identifier=BNode())
        else:
            ns = None
        # adornedProgram = factGraph.adornedProgram
        queryPred = GetOp(queryLiteral)
        if sipCollection is None:
            rules = []
        else:
            #For every rule head matching the query, we invoke the rule,
            #thus determining an adornment, and selecting a sip to follow
            rules = sipCollection.headToRule.get(queryPred, set())
            if None in sipCollection.headToRule:
                #If there are second order rules, we add them
                #since they are a 'wildcard'
                rules.update(sipCollection.headToRule[None])

        #maintained list of rules that haven't been processed before and
        #match the query
        validRules = []

        #each subquery contains values for the bound arguments that are passed
        #through the sip arcs entering the node corresponding to that literal. For
        #each subquery generated, there is a set of answers.
        answers = []

        # variableMapping = {}

        #Some TBox queries can be 'joined' together into SPARQL queries against
        #'base' predicates via an RDF dataset
        #These atomic concept inclusion axioms can be evaluated together
        #using a disjunctive operator at the body of a horn clause
        #where each item is a query of the form uniPredicate(?X):
        #Or( uniPredicate1(?X1), uniPredicate2(?X), uniPredicate3(?X), ..)
        #In this way massive, conjunctive joins can be 'mediated'
        #between the stated facts and the top-down solver
        @parameterizedPredicate([i for i in derivedPreds])
        def IsAtomicInclusionAxiomRHS(rule, dPreds):
            """
            This is an atomic inclusion axiom with
            a variable (or bound) RHS:  uniPred(?ENTITY)
            """
            bodyList = list(iterCondition(rule.formula.body))
            body = first(bodyList)
            return GetOp(body) not in dPreds and \
                   len(bodyList) == 1 and \
                   body.op == RDF.type

        atomicInclusionAxioms = list(filter(IsAtomicInclusionAxiomRHS, rules))
        if atomicInclusionAxioms and len(atomicInclusionAxioms) > 1:
            if debug:
                print("\tCombining atomic inclusion axioms: ")
                pprint(atomicInclusionAxioms, sys.stderr)
            if buildProof:
                factStep = InferenceStep(ns, source='some RDF graph')
                ns.steps.append(factStep)

            axioms = [rule.formula.body for rule in atomicInclusionAxioms]

            #attempt to exaustively apply any available substitutions
            #and determine if query if fully ground
            vars = [
                v for v in GetArgs(queryLiteral, secondOrder=True)
                if isinstance(v, Variable)
            ]
            openVars, axioms, _bindings = \
                    normalizeBindingsAndQuery(vars,
                                              bindings,
                                              axioms)
            if openVars:
                # mappings = {}
                #See if we need to do any variable mappings from the query literals
                #to the literals in the applicable rules
                query, rt = EDBQuery(axioms, factGraph, openVars,
                                     _bindings).evaluate(
                                         debug, symmAtomicInclusion=True)
                if buildProof:
                    factStep.groundQuery = subquery
                for ans in rt:
                    if buildProof:
                        factStep.bindings.update(ans)
                    memoizeMemory.setdefault(queryLiteral, set()).add(
                        (prepMemiozedAns(ans), ns))
                    yield ans, ns
            else:
                #All the relevant derivations have been explored and the result
                #is a ground query we can directly execute against the facts
                if buildProof:
                    factStep.bindings.update(bindings)
                query, rt = EDBQuery(axioms, factGraph, _bindings).evaluate(
                    debug, symmAtomicInclusion=True)
                if buildProof:
                    factStep.groundQuery = subquery
                memoizeMemory.setdefault(queryLiteral, set()).add(
                    (prepMemiozedAns(rt), ns))
                yield rt, ns
            rules = filter(lambda i: not IsAtomicInclusionAxiomRHS(i), rules)
        for rule in rules:
            #An exception is the special predicate ph; it is treated as a base
            #predicate and the tuples in it are those supplied for qb by unification.
            headBindings = getBindingsFromLiteral(goalRDFStatement,
                                                  rule.formula.head)
            # comboBindings = dict([(k, v) for k, v in itertools.chain(
            #                                           bindings.items(),
            #                                           headBindings.items())])
            varMap = rule.formula.head.getVarMapping(queryLiteral)
            if headBindings and\
                [term for term in rule.formula.head.getDistinguishedVariables(True)
                        if varMap.get(term, term) not in headBindings]:
                continue
            # subQueryAnswers = []
            # dontStop = True
            # projectedBindings = comboBindings.copy()
            if debug:
                print("%sProcessing rule" % \
                            ('\t' * proofLevel), rule.formula)
                if debug and sipCollection:
                    print("Sideways Information Passing (sip) graph for %s: " %
                          queryLiteral)
                    print(sipCollection.serialize(format='n3'))
                    for sip in SIPRepresentation(sipCollection):
                        print(sip)
            try:
                # Invoke the rule
                if buildProof:
                    step = InferenceStep(ns, rule.formula)
                else:
                    step = None
                for rt, step in\
                  invokeRule([headBindings],
                              iter(iterCondition(rule.formula.body)),
                              rule.sip,
                              (proofLevel + 1,
                               memoizeMemory,
                               sipCollection,
                               factGraph,
                               derivedPreds,
                               processedRules.union([
                                 AdornLiteral(query)])),
                              step=step,
                              debug=debug):
                    if rt:
                        if isinstance(rt, dict):
                            #We received a mapping and must rewrite it via
                            #correlation between the variables in the rule head
                            #and the variables in the original query (after applying
                            #bindings)
                            varMap = rule.formula.head.getVarMapping(
                                queryLiteral)
                            if varMap:
                                rt = MakeImmutableDict(
                                    refactorMapping(varMap, rt))
                            if buildProof:
                                step.bindings = rt
                        else:
                            if buildProof:
                                step.bindings = headBindings
                        validRules.append(rule)
                        if buildProof:
                            ns.steps.append(step)
                        if isGround:
                            yield True, ns
                        else:
                            memoizeMemory.setdefault(queryLiteral, set()).add(
                                (prepMemiozedAns(rt), ns))
                            yield rt, ns

            except RuleFailure:
                # Clean up failed antecedents
                if buildProof:
                    if ns in step.antecedents:
                        step.antecedents.remove(ns)
        if not validRules:
            #No rules matching, query factGraph for answers
            successful = False
            if buildProof:
                factStep = InferenceStep(ns, source='some RDF graph')
                ns.steps.append(factStep)
            if not isGround:
                subquery, rt = EDBQuery([queryLiteral], factGraph, [
                    v for v in GetArgs(queryLiteral, secondOrder=True)
                    if isinstance(v, Variable)
                ], bindings).evaluate(debug)
                if buildProof:
                    factStep.groundQuery = subquery
                for ans in rt:
                    successful = True
                    if buildProof:
                        factStep.bindings.update(ans)
                    memoizeMemory.setdefault(queryLiteral, set()).add(
                        (prepMemiozedAns(ans), ns))
                    yield ans, ns
                if not successful and queryPred not in derivedPreds:
                    #Open query didn't return any results and the predicate
                    #is ostensibly marked as derived predicate, so we have failed
                    memoizeMemory.setdefault(queryLiteral, set()).add(
                        (False, ns))
                    yield False, ns
            else:
                #All the relevant derivations have been explored and the result
                #is a ground query we can directly execute against the facts
                if buildProof:
                    factStep.bindings.update(bindings)

                subquery, rt = EDBQuery([queryLiteral], factGraph,
                                        bindings).evaluate(debug)
                if buildProof:
                    factStep.groundQuery = subquery
                memoizeMemory.setdefault(queryLiteral, set()).add(
                    (prepMemiozedAns(rt), ns))
                yield rt, ns