def SipStrategy(query, sipCollection, factGraph, derivedPreds, bindings={}, processedRules=None, network=None, debug=False, buildProof=False, memoizeMemory=None, proofLevel=1): """ Accordingly, we define a sip-strategy for computing the answers to a query expressed using a set of Datalog rules, and a set of sips, one for each adornment of a rule head, as follows... Each evaluation uses memoization (via Python decorators) but also relies on well-formed rewrites for using semi-naive bottom up method over large SPARQL data. """ memoizeMemory = memoizeMemory and memoizeMemory or {} queryLiteral = BuildUnitermFromTuple(query) processedRules = processedRules and processedRules or set() if bindings: #There are bindings. Apply them to the terms in the query queryLiteral.ground(bindings) if debug: print("%sSolving" % ('\t' * proofLevel), queryLiteral, bindings) # Only consider ground triple pattern isomorphism with matching bindings goalRDFStatement = queryLiteral.toRDFTuple() if queryLiteral in memoizeMemory: if debug: print("%sReturning previously calculated results for " % \ ('\t' * proofLevel), queryLiteral) for answers in memoizeMemory[queryLiteral]: yield answers elif AlphaNode(goalRDFStatement).alphaNetworkHash( True, skolemTerms=list(bindings.values())) in \ [AlphaNode(r.toRDFTuple()).alphaNetworkHash(True, skolemTerms=list(bindings.values())) for r in processedRules if AdornLiteral(goalRDFStatement).adornment == \ r.adornment]: if debug: print("%s Goal already processed..." % \ ('\t' * proofLevel)) else: isGround = literalIsGround(queryLiteral) if buildProof: ns = NodeSet(goalRDFStatement, network=network, identifier=BNode()) else: ns = None # adornedProgram = factGraph.adornedProgram queryPred = GetOp(queryLiteral) if sipCollection is None: rules = [] else: #For every rule head matching the query, we invoke the rule, #thus determining an adornment, and selecting a sip to follow rules = sipCollection.headToRule.get(queryPred, set()) if None in sipCollection.headToRule: #If there are second order rules, we add them #since they are a 'wildcard' rules.update(sipCollection.headToRule[None]) #maintained list of rules that haven't been processed before and #match the query validRules = [] #each subquery contains values for the bound arguments that are passed #through the sip arcs entering the node corresponding to that literal. For #each subquery generated, there is a set of answers. answers = [] # variableMapping = {} #Some TBox queries can be 'joined' together into SPARQL queries against #'base' predicates via an RDF dataset #These atomic concept inclusion axioms can be evaluated together #using a disjunctive operator at the body of a horn clause #where each item is a query of the form uniPredicate(?X): #Or( uniPredicate1(?X1), uniPredicate2(?X), uniPredicate3(?X), ..) #In this way massive, conjunctive joins can be 'mediated' #between the stated facts and the top-down solver @parameterizedPredicate([i for i in derivedPreds]) def IsAtomicInclusionAxiomRHS(rule, dPreds): """ This is an atomic inclusion axiom with a variable (or bound) RHS: uniPred(?ENTITY) """ bodyList = list(iterCondition(rule.formula.body)) body = first(bodyList) return GetOp(body) not in dPreds and \ len(bodyList) == 1 and \ body.op == RDF.type atomicInclusionAxioms = list(filter(IsAtomicInclusionAxiomRHS, rules)) if atomicInclusionAxioms and len(atomicInclusionAxioms) > 1: if debug: print("\tCombining atomic inclusion axioms: ") pprint(atomicInclusionAxioms, sys.stderr) if buildProof: factStep = InferenceStep(ns, source='some RDF graph') ns.steps.append(factStep) axioms = [rule.formula.body for rule in atomicInclusionAxioms] #attempt to exaustively apply any available substitutions #and determine if query if fully ground vars = [ v for v in GetArgs(queryLiteral, secondOrder=True) if isinstance(v, Variable) ] openVars, axioms, _bindings = \ normalizeBindingsAndQuery(vars, bindings, axioms) if openVars: # mappings = {} #See if we need to do any variable mappings from the query literals #to the literals in the applicable rules query, rt = EDBQuery(axioms, factGraph, openVars, _bindings).evaluate( debug, symmAtomicInclusion=True) if buildProof: factStep.groundQuery = subquery for ans in rt: if buildProof: factStep.bindings.update(ans) memoizeMemory.setdefault(queryLiteral, set()).add( (prepMemiozedAns(ans), ns)) yield ans, ns else: #All the relevant derivations have been explored and the result #is a ground query we can directly execute against the facts if buildProof: factStep.bindings.update(bindings) query, rt = EDBQuery(axioms, factGraph, _bindings).evaluate( debug, symmAtomicInclusion=True) if buildProof: factStep.groundQuery = subquery memoizeMemory.setdefault(queryLiteral, set()).add( (prepMemiozedAns(rt), ns)) yield rt, ns rules = filter(lambda i: not IsAtomicInclusionAxiomRHS(i), rules) for rule in rules: #An exception is the special predicate ph; it is treated as a base #predicate and the tuples in it are those supplied for qb by unification. headBindings = getBindingsFromLiteral(goalRDFStatement, rule.formula.head) # comboBindings = dict([(k, v) for k, v in itertools.chain( # bindings.items(), # headBindings.items())]) varMap = rule.formula.head.getVarMapping(queryLiteral) if headBindings and\ [term for term in rule.formula.head.getDistinguishedVariables(True) if varMap.get(term, term) not in headBindings]: continue # subQueryAnswers = [] # dontStop = True # projectedBindings = comboBindings.copy() if debug: print("%sProcessing rule" % \ ('\t' * proofLevel), rule.formula) if debug and sipCollection: print("Sideways Information Passing (sip) graph for %s: " % queryLiteral) print(sipCollection.serialize(format='n3')) for sip in SIPRepresentation(sipCollection): print(sip) try: # Invoke the rule if buildProof: step = InferenceStep(ns, rule.formula) else: step = None for rt, step in\ invokeRule([headBindings], iter(iterCondition(rule.formula.body)), rule.sip, (proofLevel + 1, memoizeMemory, sipCollection, factGraph, derivedPreds, processedRules.union([ AdornLiteral(query)])), step=step, debug=debug): if rt: if isinstance(rt, dict): #We received a mapping and must rewrite it via #correlation between the variables in the rule head #and the variables in the original query (after applying #bindings) varMap = rule.formula.head.getVarMapping( queryLiteral) if varMap: rt = MakeImmutableDict( refactorMapping(varMap, rt)) if buildProof: step.bindings = rt else: if buildProof: step.bindings = headBindings validRules.append(rule) if buildProof: ns.steps.append(step) if isGround: yield True, ns else: memoizeMemory.setdefault(queryLiteral, set()).add( (prepMemiozedAns(rt), ns)) yield rt, ns except RuleFailure: # Clean up failed antecedents if buildProof: if ns in step.antecedents: step.antecedents.remove(ns) if not validRules: #No rules matching, query factGraph for answers successful = False if buildProof: factStep = InferenceStep(ns, source='some RDF graph') ns.steps.append(factStep) if not isGround: subquery, rt = EDBQuery([queryLiteral], factGraph, [ v for v in GetArgs(queryLiteral, secondOrder=True) if isinstance(v, Variable) ], bindings).evaluate(debug) if buildProof: factStep.groundQuery = subquery for ans in rt: successful = True if buildProof: factStep.bindings.update(ans) memoizeMemory.setdefault(queryLiteral, set()).add( (prepMemiozedAns(ans), ns)) yield ans, ns if not successful and queryPred not in derivedPreds: #Open query didn't return any results and the predicate #is ostensibly marked as derived predicate, so we have failed memoizeMemory.setdefault(queryLiteral, set()).add( (False, ns)) yield False, ns else: #All the relevant derivations have been explored and the result #is a ground query we can directly execute against the facts if buildProof: factStep.bindings.update(bindings) subquery, rt = EDBQuery([queryLiteral], factGraph, bindings).evaluate(debug) if buildProof: factStep.groundQuery = subquery memoizeMemory.setdefault(queryLiteral, set()).add( (prepMemiozedAns(rt), ns)) yield rt, ns
def sparql_query(self, queryString, queryObj, graph, dataSetBase, extensionFunctions, initBindings={}, initNs={}, DEBUG=False): """ The default 'native' SPARQL implementation is based on sparql-p's expansion trees layered on top of the read-only RDF APIs of the underlying store """ from rdflib.sparql.Algebra import TopEvaluate from rdflib.QueryResult import QueryResult from rdflib import plugin from rdflib.sparql.bison.Query import AskQuery _expr = self.isaBaseQuery(None, queryObj) if isinstance(queryObj.query, AskQuery) and \ _expr.name == 'BGP': # isinstance(_expr, BasicGraphPattern): #This is a ground, BGP, involving IDB and can be solved directly #using top-down decision procedure #First separate out conjunct into EDB and IDB predicates #(solving the former first) from FuXi.SPARQL import EDBQuery groundConjunct = [] derivedConjunct = [] for s, p, o, func in _expr.patterns: if self.derivedPredicateFromTriple((s, p, o)) is None: groundConjunct.append(BuildUnitermFromTuple((s, p, o))) else: derivedConjunct.append(BuildUnitermFromTuple((s, p, o))) if groundConjunct: baseEDBQuery = EDBQuery(groundConjunct, self.edb) subQuery, ans = baseEDBQuery.evaluate(DEBUG) assert isinstance(ans, bool), ans if groundConjunct and not ans: askResult = False else: askResult = True for derivedLiteral in derivedConjunct: goal = derivedLiteral.toRDFTuple() #Solve ground, derived goal directly SetupDDLAndAdornProgram( self.edb, self.idb, [goal], derivedPreds=self.derivedPredicates, ignoreUnboundDPreds=True, hybridPreds2Replace=self.hybridPredicates) if self.hybridPredicates: lit = BuildUnitermFromTuple(goal) op = GetOp(lit) if op in self.hybridPredicates: lit.setOperator(URIRef(op + u'_derived')) goal = lit.toRDFTuple() sipCollection = PrepareSipCollection( self.edb.adornedProgram) if self.DEBUG and sipCollection: for sip in SIPRepresentation(sipCollection): print(sip) pprint(list(self.edb.adornedProgram)) elif self.DEBUG: print("No SIP graph.") rt, node = first( self.invokeDecisionProcedure(goal, self.edb, {}, self.DEBUG, sipCollection)) if not rt: askResult = False break return plugin.get('SPARQLQueryResult', QueryResult)(askResult) else: rt = TopEvaluate(queryObj, graph, initBindings, DEBUG=self.DEBUG, dataSetBase=dataSetBase, extensionFunctions=extensionFunctions) return plugin.get('SPARQLQueryResult', QueryResult)(rt)
def MagicSetTransformation(factGraph, rules, GOALS, derivedPreds=None, strictCheck=DDL_STRICTNESS_FALLBACK_DERIVED, noMagic=None, defaultPredicates=None): """ Takes a goal and a ruleset and returns an iterator over the rulest that corresponds to the magic set transformation: """ noMagic = noMagic and noMagic or [] magicPredicates = set() # replacement = {} adornedProgram = SetupDDLAndAdornProgram( factGraph, rules, GOALS, derivedPreds=derivedPreds, strictCheck=strictCheck, defaultPredicates=defaultPredicates) newRules = [] for rule in adornedProgram: if rule.isSecondOrder(): import warnings warnings.warn("Second order rule no supported by GMS: %s" % rule, RuntimeWarning) magicPositions = {} #Generate magic rules for idx, pred in enumerate(iterCondition(rule.formula.body)): # magicBody = [] if isinstance(pred, AdornedUniTerm): # and pred not in magicPredicates: # For each rule r in Pad, and for each occurrence of an adorned # predicate p a in its body, we generate a magic rule defining magic_p a prevPreds = [ item for _idx, item in enumerate(rule.formula.body) if _idx < idx ] if 'b' not in pred.adornment: import warnings warnings.warn( "adorned predicate w/out any bound arguments (%s in %s)" % (pred, rule.formula), RuntimeWarning) if GetOp(pred) not in noMagic: magicPred = pred.makeMagicPred() magicPositions[idx] = (magicPred, pred) inArcs = [(N, x) for ( N, x) in IncomingSIPArcs(rule.sip, getOccurrenceId(pred)) if not set(x).difference(GetArgs(pred))] if len(inArcs) > 1: # If there are several arcs entering qi, we define the # magic rule defining magic_qi in two steps. First, # for each arc Nj --> qi with label cj , we define a # rule with head label_qi_j(cj ). The body of the rule # is the same as the body of the magic rule in the # case where there is a single arc entering qi # (described above). Then the magic rule is defined as # follows. The head is magic_q(0). The body contains # label_qi_j(cj) for all j (that is, for all arcs # entering qi ). # # We combine all incoming arcs into a single list of # (body) conditions for the magic set PrettyPrintRule(rule) SIPRepresentation(rule.sip) print(pred, magicPred) _body = [] additionalRules = [] for idxSip, (N, x) in enumerate(inArcs): newPred = pred.clone() SetOp(newPred, URIRef('%s_label_%s' % (newPred.op, idxSip))) ruleBody = And( buildMagicBody(N, prevPreds, rule.formula.head, derivedPreds)) additionalRules.append( Rule(Clause(ruleBody, newPred))) _body.extend(newPred) # _body.extend(ruleBody) additionalRules.append( Rule(Clause(And(_body), magicPred))) newRules.extend(additionalRules) for i in additionalRules: print(i) raise NotImplementedError() else: for idxSip, (N, x) in enumerate(inArcs): ruleBody = And( buildMagicBody(N, prevPreds, rule.formula.head, derivedPreds, noMagic)) newRule = Rule(Clause(ruleBody, magicPred)) newRules.append(newRule) magicPredicates.add(magicPred) # Modify rules # we modify the original rule by inserting # occurrences of the magic predicates corresponding # to the derived predicates of the body and to the head # If there are no bound arguments in the head, we don't modify the rule idxIncrement = 0 newRule = copy.deepcopy(rule) for idx, (magicPred, origPred) in list(magicPositions.items()): newRule.formula.body.formulae.insert(idx + idxIncrement, magicPred) idxIncrement += 1 if 'b' in rule.formula.head.adornment and GetOp( rule.formula.head) not in noMagic: headMagicPred = rule.formula.head.makeMagicPred() if isinstance(newRule.formula.body, Uniterm): newRule.formula.body = And( [headMagicPred, newRule.formula.body]) else: newRule.formula.body.formulae.insert(0, headMagicPred) newRules.append(newRule) if not newRules: newRules.extend(AdditionalRules(factGraph)) for rule in newRules: if rule.formula.body: yield rule
def conjunctiveSipStrategy(self, goalsRemaining, factGraph, bindings=None): """ Given a conjunctive set of triples, invoke sip-strategy passing on intermediate solutions to facilitate 'join' behavior """ bindings = bindings if bindings else {} try: tp = next(goalsRemaining) assert isinstance(bindings, dict) dPred = self.derivedPredicateFromTriple(tp) if dPred is None: baseEDBQuery = EDBQuery([BuildUnitermFromTuple(tp)], self.edb, bindings=bindings) if self.DEBUG: print("Evaluating TP against EDB:%s" % baseEDBQuery.asSPARQL()) query, rt = baseEDBQuery.evaluate() # _vars = baseEDBQuery.returnVars for item in rt: bindings.update(item) for ansDict in self.conjunctiveSipStrategy( goalsRemaining, factGraph, bindings): yield ansDict else: queryLit = BuildUnitermFromTuple(tp) currentOp = GetOp(queryLit) queryLit.setOperator(currentOp) query = EDBQuery([queryLit], self.edb, bindings=bindings) if bindings: tp = first(query.formulae).toRDFTuple() if self.DEBUG: print("Goal/Query: ", query.asSPARQL()) SetupDDLAndAdornProgram( self.edb, self.idb, [tp], derivedPreds=self.derivedPredicates, ignoreUnboundDPreds=True, hybridPreds2Replace=self.hybridPredicates) if self.hybridPredicates: lit = BuildUnitermFromTuple(tp) op = GetOp(lit) if op in self.hybridPredicates: lit.setOperator(URIRef(op + u'_derived')) tp = lit.toRDFTuple() sipCollection = PrepareSipCollection(self.edb.adornedProgram) if self.DEBUG and sipCollection: for sip in SIPRepresentation(sipCollection): print(sip) pprint(list(self.edb.adornedProgram), sys.stderr) elif self.DEBUG: print("No SIP graph.") for nextAnswer, ns in self.invokeDecisionProcedure( tp, factGraph, bindings, self.DEBUG, sipCollection): nonGroundGoal = isinstance(nextAnswer, dict) if nonGroundGoal or nextAnswer: #Either we recieved bindings from top-down evaluation #or we (successfully) proved a ground query if not nonGroundGoal: #Attempt to prove a ground query, return the response rt = nextAnswer else: #Recieved solutions to 'open' query, merge with given bindings #and continue rt = mergeMappings1To2(bindings, nextAnswer) #either answers were provided (the goal wasn't grounded) or #the goal was ground and successfully proved for ansDict in self.conjunctiveSipStrategy( goalsRemaining, factGraph, rt): yield ansDict except StopIteration: yield bindings