def parseN3Logic(self,src): store=N3RuleStore(additionalBuiltins=self.ruleStore.filters) Graph(store).parse(src,format='n3') store._finalize() assert len(store.rules),"There are no rules passed in!" from FuXi.Horn.HornRules import Ruleset for rule in Ruleset(n3Rules=store.rules, nsMapping=self.nsMap): self.buildNetwork(iter(rule.formula.body), iter(rule.formula.head), rule) self.rules.add(rule) self.alphaNodes = [node for node in self.nodes.values() if isinstance(node,AlphaNode)] self.alphaBuiltInNodes = [node for node in self.nodes.values() if isinstance(node,BuiltInAlphaNode)]
def NormalizeLPDb(ruleGraph,fact_db): """ For performance reasons, it 1s good to decompose the database into a set of pure base predicates (which can then be stored using a standard DBMS) and a set of pure derived predicates Fortunately, such a decomposition 1s always possible, because every database can be rewritten ...as database containing only base and derived predicates. >>> ruleStore,ruleGraph=SetupRuleStore() >>> g=ruleGraph.parse(StringIO(PARTITION_LP_DB_PREDICATES),format='n3') >>> ruleStore._finalize() >>> len(ruleStore.rules) 1 >>> factGraph=Graph().parse(StringIO(PARTITION_LP_DB_PREDICATES),format='n3') >>> rs=Ruleset(n3Rules=ruleStore.rules,nsMapping=ruleStore.nsMgr) >>> for i in rs: print i Forall ?Y ?X ?Z ( ex:grandfather(?X ?Y) :- And( ex:father(?X ?Z) ex:parent(?X ?Y) ) ) >>> len(factGraph) 4 >>> print [p for p,iter in iter_non_base_non_derived_preds(rs,factGraph)] [rdflib.URIRef('http://doi.acm.org/10.1145/16856.16859#grandfather')] """ candidatePreds=False rs=Ruleset(n3Rules=ruleGraph.store.rules, nsMapping=ruleStore.nsMgr) toAdd=[] for pred,replFacts in iter_non_base_non_derived_preds(rs,fact_db): replPred=URIRef(pred+'_ext') for s,p,o in replFacts: fact_db.remove((s,p,o)) toAdd.append((s,replPred,o)) head=Uniterm(pred,pred.arg) body=Uniterm(replPred,pred.arg) newRule=Rule(Clause(body,head), [term for term in pred.arg if isinstance(term,Variable)]) rs.append(newRule) return rs
def main(): from optparse import OptionParser op = OptionParser( 'usage: %prog [options] factFile1 factFile2 ... factFileN') op.add_option( '--why', default=None, help='Specifies the goals to solve for using the non-naive methods' + 'see --method') op.add_option( '--closure', action='store_true', default=False, help='Whether or not to serialize the inferred triples' + ' along with the original triples. Otherwise ' + '(the default behavior), serialize only the inferred triples') op.add_option( '--imports', action='store_true', default=False, help='Whether or not to follow owl:imports in the fact graph') op.add_option( '--output', default='n3', metavar='RDF_FORMAT', choices=[ 'xml', 'TriX', 'n3', 'pml', 'proof-graph', 'nt', 'rif', 'rif-xml', 'conflict', 'man-owl' ], help= "Serialize the inferred triples and/or original RDF triples to STDOUT " + "using the specified RDF syntax ('xml', 'pretty-xml', 'nt', 'turtle', " + "or 'n3') or to print a summary of the conflict set (from the RETE " + "network) if the value of this option is 'conflict'. If the the " + " value is 'rif' or 'rif-xml', Then the rules used for inference " + "will be serialized as RIF. If the value is 'pml' and --why is used, " + " then the PML RDF statements are serialized. If output is " + "'proof-graph then a graphviz .dot file of the proof graph is printed. " + "Finally if the value is 'man-owl', then the RDF facts are assumed " + "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default" ) op.add_option( '--class', dest='classes', action='append', default=[], metavar='QNAME', help='Used with --output=man-owl to determine which ' + 'classes within the entire OWL/RDF are targetted for serialization' + '. Can be used more than once') op.add_option( '--hybrid', action='store_true', default=False, help='Used with with --method=bfp to determine whether or not to ' + 'peek into the fact graph to identify predicates that are both ' + 'derived and base. This is expensive for large fact graphs' + 'and is explicitely not used against SPARQL endpoints') op.add_option( '--property', action='append', dest='properties', default=[], metavar='QNAME', help='Used with --output=man-owl or --extract to determine which ' + 'properties are serialized / extracted. Can be used more than once') op.add_option( '--normalize', action='store_true', default=False, help= "Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]" + "The default is %default") op.add_option( '--ddlGraph', default=False, help= "The location of a N3 Data Description document describing the IDB predicates" ) op.add_option( '--input-format', default='xml', dest='inputFormat', metavar='RDF_FORMAT', choices=['xml', 'trix', 'n3', 'nt', 'rdfa'], help= "The format of the RDF document(s) which serve as the initial facts " + " for the RETE network. One of 'xml', 'n3', 'trix', 'nt', " + "or 'rdfa'. The default is %default") op.add_option( '--safety', default='none', metavar='RULE_SAFETY', choices=['loose', 'strict', 'none'], help="Determines how to handle RIF Core safety. A value of 'loose' " + " means that unsafe rules will be ignored. A value of 'strict' " + " will cause a syntax exception upon any unsafe rule. A value of " + "'none' (the default) does nothing") op.add_option( '--pDSemantics', action='store_true', default=False, help= 'Used with --dlp to add pD semantics ruleset for semantics not covered ' + 'by DLP but can be expressed in definite Datalog Logic Programming' + ' The default is %default') op.add_option( '--stdin', action='store_true', default=False, help= 'Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ' ) op.add_option( '--ns', action='append', default=[], metavar="PREFIX=URI", help='Register a namespace binding (QName prefix to a base URI). This ' + 'can be used more than once') op.add_option( '--rules', default=[], action='append', metavar='PATH_OR_URI', help='The Notation 3 documents to use as rulesets for the RETE network' + '. Can be specified more than once') op.add_option('-d', '--debug', action='store_true', default=True, help='Include debugging output') op.add_option( '--strictness', default='defaultBase', metavar='DDL_STRICTNESS', choices=['loose', 'defaultBase', 'defaultDerived', 'harsh'], help= 'Used with --why to specify whether to: *not* check if predicates are ' + ' both derived and base (loose), if they are, mark as derived (defaultDerived) ' + 'or as base (defaultBase) predicates, else raise an exception (harsh)') op.add_option( '--method', default='naive', metavar='reasoning algorithm', choices=['gms', 'bfp', 'naive'], help='Used with --why to specify how to evaluate answers for query. ' + 'One of: gms, sld, bfp, naive') op.add_option( '--firstAnswer', default=False, action='store_true', help= 'Used with --why to determine whether to fetch all answers or just ' + 'the first') op.add_option( '--edb', default=[], action='append', metavar='EXTENSIONAL_DB_PREDICATE_QNAME', help= 'Used with --why/--strictness=defaultDerived to specify which clashing ' + 'predicate will be designated as a base predicate') op.add_option( '--idb', default=[], action='append', metavar='INTENSIONAL_DB_PREDICATE_QNAME', help= 'Used with --why/--strictness=defaultBase to specify which clashing ' + 'predicate will be designated as a derived predicate') op.add_option( '--hybridPredicate', default=[], action='append', metavar='PREDICATE_QNAME', help= 'Used with --why to explicitely specify a hybrid predicate (in both ' + ' IDB and EDB) ') op.add_option( '--noMagic', default=[], action='append', metavar='DB_PREDICATE_QNAME', help='Used with --why to specify that the predicate shouldnt have its ' + 'magic sets calculated') op.add_option( '--filter', action='append', default=[], metavar='PATH_OR_URI', help= 'The Notation 3 documents to use as a filter (entailments do not particpate in network)' ) op.add_option( '--ruleFacts', action='store_true', default=False, help="Determines whether or not to attempt to parse initial facts from " + "the rule graph. The default is %default") op.add_option( '--builtins', default=False, metavar='PATH_TO_PYTHON_MODULE', help="The path to a python module with function definitions (and a " + "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations" ) op.add_option( '--dlp', action='store_true', default=False, help= 'Use Description Logic Programming (DLP) to extract rules from OWL/RDF. The default is %default' ) op.add_option( '--sparqlEndpoint', action='store_true', default=False, help= 'Indicates that the sole argument is the URI of a SPARQL endpoint to query' ) op.add_option( '--ontology', action='append', default=[], metavar='PATH_OR_URI', help= 'The path to an OWL RDF/XML graph to use DLP to extract rules from ' + '(other wise, fact graph(s) are used) ') op.add_option( '--ontologyFormat', default='xml', dest='ontologyFormat', metavar='RDF_FORMAT', choices=['xml', 'trix', 'n3', 'nt', 'rdfa'], help= "The format of the OWL RDF/XML graph specified via --ontology. The default is %default" ) op.add_option( '--builtinTemplates', default=None, metavar='N3_DOC_PATH_OR_URI', help= 'The path to an N3 document associating SPARQL FILTER templates to ' + 'rule builtins') op.add_option('--negation', action='store_true', default=False, help='Extract negative rules?') op.add_option( '--normalForm', action='store_true', default=False, help='Whether or not to reduce DL axioms & LP rules to a normal form') (options, facts) = op.parse_args() nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'} for nsBind in options.ns: pref, nsUri = nsBind.split('=') nsBinds[pref] = nsUri namespace_manager = NamespaceManager(Graph()) if options.sparqlEndpoint: factGraph = Graph(plugin.get('SPARQLStore', Store)(facts[0])) options.hybrid = False else: factGraph = Graph() ruleSet = Ruleset() for fileN in options.rules: if options.ruleFacts and not options.sparqlEndpoint: factGraph.parse(fileN, format='n3') print("Parsing RDF facts from ", fileN) if options.builtins: import imp userFuncs = imp.load_source('builtins', options.builtins) rs = HornFromN3(fileN, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS) else: rs = HornFromN3(fileN) nsBinds.update(rs.nsMapping) ruleSet.formulae.extend(rs) #ruleGraph.parse(fileN, format='n3') ruleSet.nsMapping = nsBinds for prefix, uri in list(nsBinds.items()): namespace_manager.bind(prefix, uri, override=False) closureDeltaGraph = Graph() closureDeltaGraph.namespace_manager = namespace_manager factGraph.namespace_manager = namespace_manager if not options.sparqlEndpoint: for fileN in facts: factGraph.parse(fileN, format=options.inputFormat) if options.imports: for owlImport in factGraph.objects(predicate=OWL_NS.imports): factGraph.parse(owlImport) print("Parsed Semantic Web Graph.. ", owlImport) if not options.sparqlEndpoint and facts: for pref, uri in factGraph.namespaces(): nsBinds[pref] = uri if options.stdin: assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint" factGraph.parse(sys.stdin, format=options.inputFormat) #Normalize namespace mappings #prune redundant, rdflib-allocated namespace prefix mappings newNsMgr = NamespaceManager(factGraph) from FuXi.Rete.Util import CollapseDictionary for k, v in list( CollapseDictionary( dict([(k, v) for k, v in factGraph.namespaces()])).items()): newNsMgr.bind(k, v) factGraph.namespace_manager = newNsMgr if options.normalForm: NormalFormReduction(factGraph) if not options.sparqlEndpoint: workingMemory = generateTokenSet(factGraph) if options.builtins: import imp userFuncs = imp.load_source('builtins', options.builtins) rule_store, rule_graph, network = SetupRuleStore( makeNetwork=True, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS) else: rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True) network.inferredFacts = closureDeltaGraph network.nsMap = nsBinds if options.dlp: from FuXi.DLP.DLNormalization import NormalFormReduction if options.ontology: ontGraph = Graph() for fileN in options.ontology: ontGraph.parse(fileN, format=options.ontologyFormat) for prefix, uri in ontGraph.namespaces(): nsBinds[prefix] = uri namespace_manager.bind(prefix, uri, override=False) if options.sparqlEndpoint: factGraph.store.bind(prefix, uri) else: ontGraph = factGraph NormalFormReduction(ontGraph) dlp = network.setupDescriptionLogicProgramming( ontGraph, addPDSemantics=options.pDSemantics, constructNetwork=False, ignoreNegativeStratus=options.negation, safety=safetyNameMap[options.safety]) ruleSet.formulae.extend(dlp) if options.output == 'rif' and not options.why: for rule in ruleSet: print(rule) if options.negation: for nRule in network.negRules: print(nRule) elif options.output == 'man-owl': cGraph = network.closureGraph(factGraph, readOnly=False) cGraph.namespace_manager = namespace_manager Individual.factoryGraph = cGraph if options.classes: mapping = dict(namespace_manager.namespaces()) for c in options.classes: pref, uri = c.split(':') print(Class(URIRef(mapping[pref] + uri)).__repr__(True)) elif options.properties: mapping = dict(namespace_manager.namespaces()) for p in options.properties: pref, uri = p.split(':') print(Property(URIRef(mapping[pref] + uri))) else: for p in AllProperties(cGraph): print(p.identifier, first(p.label)) print(repr(p)) for c in AllClasses(cGraph): if options.normalize: if c.isPrimitive(): primAnc = [ sc for sc in c.subClassOf if sc.isPrimitive() ] if len(primAnc) > 1: warnings.warn( "Branches of primitive skeleton taxonomy" + " should form trees: %s has %s primitive parents: %s" % (c.qname, len(primAnc), primAnc), UserWarning, 1) children = [desc for desc in c.subSumpteeIds()] for child in children: for otherChild in [ o for o in children if o is not child ]: if not otherChild in [ c.identifier for c in Class(child).disjointWith ]: # and \ warnings.warn( "Primitive children (of %s) " % (c.qname) + \ "must be mutually disjoint: %s and %s" % ( Class(child).qname, Class(otherChild).qname), UserWarning, 1) # if not isinstance(c.identifier, BNode): print(c.__repr__(True)) if not options.why: # Naive construction of graph for rule in ruleSet: network.buildNetworkFromClause(rule) magicSeeds = [] if options.why: builtinTemplateGraph = Graph() if options.builtinTemplates: builtinTemplateGraph = Graph().parse(options.builtinTemplates, format='n3') factGraph.templateMap = \ dict([(pred, template) for pred, _ignore, template in builtinTemplateGraph.triples( (None, TEMPLATES.filterTemplate, None))]) goals = [] query = ParseSPARQL(options.why) network.nsMap['pml'] = PML network.nsMap['gmp'] = GMP_NS network.nsMap['owl'] = OWL_NS nsBinds.update(network.nsMap) network.nsMap = nsBinds if not query.prologue: query.prologue = Prologue(None, []) query.prologue.prefixBindings.update(nsBinds) else: for prefix, nsInst in list(nsBinds.items()): if prefix not in query.prologue.prefixBindings: query.prologue.prefixBindings[prefix] = nsInst print("query.prologue", query.prologue) print("query.query", query.query) print("query.query.whereClause", query.query.whereClause) print("query.query.whereClause.parsedGraphPattern", query.query.whereClause.parsedGraphPattern) goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern( query.query.whereClause.parsedGraphPattern, query.prologue).patterns]) # dPreds=[]# p for s, p, o in goals ] # print("goals", goals) magicRuleNo = 0 bottomUpDerivedPreds = [] # topDownDerivedPreds = [] defaultBasePreds = [] defaultDerivedPreds = set() hybridPredicates = [] mapping = dict(newNsMgr.namespaces()) for edb in options.edb: pref, uri = edb.split(':') defaultBasePreds.append(URIRef(mapping[pref] + uri)) noMagic = [] for pred in options.noMagic: pref, uri = pred.split(':') noMagic.append(URIRef(mapping[pref] + uri)) if options.ddlGraph: ddlGraph = Graph().parse(options.ddlGraph, format='n3') # @TODO: should also get hybrid predicates from DDL graph defaultDerivedPreds = IdentifyDerivedPredicates( ddlGraph, Graph(), ruleSet) else: for idb in options.idb: pref, uri = idb.split(':') defaultDerivedPreds.add(URIRef(mapping[pref] + uri)) defaultDerivedPreds.update( set([p == RDF.type and o or p for s, p, o in goals])) for hybrid in options.hybridPredicate: pref, uri = hybrid.split(':') hybridPredicates.append(URIRef(mapping[pref] + uri)) if options.method == 'gms': for goal in goals: goalSeed = AdornLiteral(goal).makeMagicPred() print("Magic seed fact (used in bottom-up evaluation)", goalSeed) magicSeeds.append(goalSeed.toRDFTuple()) if noMagic: print("Predicates whose magic sets will not be calculated") for p in noMagic: print("\t", factGraph.qname(p)) for rule in MagicSetTransformation( factGraph, ruleSet, goals, derivedPreds=bottomUpDerivedPreds, strictCheck=nameMap[options.strictness], defaultPredicates=(defaultBasePreds, defaultDerivedPreds), noMagic=noMagic): magicRuleNo += 1 network.buildNetworkFromClause(rule) if len(list(ruleSet)): print("reduction in size of program: %s (%s -> %s clauses)" % (100 - (float(magicRuleNo) / float(len(list(ruleSet)))) * 100, len(list(ruleSet)), magicRuleNo)) start = time.time() network.feedFactsToAdd(generateTokenSet(magicSeeds)) if not [ rule for rule in factGraph.adornedProgram if len(rule.sip) ]: warnings.warn( "Using GMS sideways information strategy with no " + "information to pass from query. Falling back to " + "naive method over given facts and rules") network.feedFactsToAdd(workingMemory) sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds" % sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds" % sTime print("Time to calculate closure on working memory: ", sTimeStr) if options.output == 'rif': print("Rules used for bottom-up evaluation") if network.rules: for clause in network.rules: print(clause) else: for clause in factGraph.adornedProgram: print(clause) if options.output == 'conflict': network.reportConflictSet() elif options.method == 'bfp': topDownDPreds = defaultDerivedPreds if options.builtinTemplates: builtinTemplateGraph = Graph().parse(options.builtinTemplates, format='n3') builtinDict = dict([ (pred, template) for pred, _ignore, template in builtinTemplateGraph.triples((None, TEMPLATES.filterTemplate, None)) ]) else: builtinDict = None topDownStore = TopDownSPARQLEntailingStore( factGraph.store, factGraph, idb=ruleSet, DEBUG=options.debug, derivedPredicates=topDownDPreds, templateMap=builtinDict, nsBindings=network.nsMap, identifyHybridPredicates=options.hybrid if options.method == 'bfp' else False, hybridPredicates=hybridPredicates) targetGraph = Graph(topDownStore) for pref, nsUri in list(network.nsMap.items()): targetGraph.bind(pref, nsUri) start = time.time() # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal) for goal in goals], # targetGraph) # query = queryLiteral.asSPARQL() # print("Goal to solve ", query) sTime = time.time() - start result = targetGraph.query(options.why, initNs=network.nsMap) if result.askAnswer: sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds" % sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds" % sTime print("Time to reach answer ground goal answer of %s: %s" % (result.askAnswer[0], sTimeStr)) else: for rt in result: sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds" % sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds" % sTime if options.firstAnswer: break print( "Time to reach answer %s via top-down SPARQL sip strategy: %s" % (rt, sTimeStr)) if options.output == 'conflict' and options.method == 'bfp': for _network, _goal in topDownStore.queryNetworks: print(network, _goal) _network.reportConflictSet(options.debug) for query in topDownStore.edbQueries: print(query.asSPARQL()) elif options.method == 'naive': start = time.time() network.feedFactsToAdd(workingMemory) sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds" % sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds" % sTime print("Time to calculate closure on working memory: ", sTimeStr) print(network) if options.output == 'conflict': network.reportConflictSet() for fileN in options.filter: for rule in HornFromN3(fileN): network.buildFilterNetworkFromClause(rule) if options.negation and network.negRules and options.method in [ 'both', 'bottomUp' ]: now = time.time() rt = network.calculateStratifiedModel(factGraph) print( "Time to calculate stratified, stable model (inferred %s facts): %s" % (rt, time.time() - now)) if options.filter: print("Applying filter to entailed facts") network.inferredFacts = network.filteredFacts if options.closure and options.output in RDF_SERIALIZATION_FORMATS: cGraph = network.closureGraph(factGraph) cGraph.namespace_manager = namespace_manager print( cGraph.serialize(destination=None, format=options.output, base=None)) elif options.output and options.output in RDF_SERIALIZATION_FORMATS: print( network.inferredFacts.serialize(destination=None, format=options.output, base=None))
def main(): from optparse import OptionParser op = OptionParser( 'usage: %prog [options] factFile1 factFile2 ... factFileN') op.add_option('--why', default=None, help='Specifies the goals to solve for using the non-niave methods' + 'see --method') op.add_option('--closure', action='store_true', default=False, help='Whether or not to serialize the inferred triples' + ' along with the original triples. Otherwise ' + '(the default behavior), serialize only the inferred triples') op.add_option('--imports', action='store_true', default=False, help='Whether or not to follow owl:imports in the fact graph') op.add_option('--output', default='n3', metavar='RDF_FORMAT', choices=['xml', 'TriX', 'n3', 'pml', 'proof-graph', 'nt', 'rif', 'rif-xml', 'conflict', 'man-owl'], help="Serialize the inferred triples and/or original RDF triples to STDOUT "+ "using the specified RDF syntax ('xml','pretty-xml','nt','turtle', "+ "or 'n3') or to print a summary of the conflict set (from the RETE "+ "network) if the value of this option is 'conflict'. If the the "+ " value is 'rif' or 'rif-xml', Then the rules used for inference "+ "will be serialized as RIF. If the value is 'pml' and --why is used, "+ " then the PML RDF statements are serialized. If output is "+ "'proof-graph then a graphviz .dot file of the proof graph is printed. "+ "Finally if the value is 'man-owl', then the RDF facts are assumed "+ "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default") op.add_option('--class', dest='classes', action='append', default=[], metavar='QNAME', help='Used with --output=man-owl to determine which '+ 'classes within the entire OWL/RDF are targetted for serialization'+ '. Can be used more than once') op.add_option('--hybrid', action='store_true', default=False, help='Used with with --method=bfp to determine whether or not to '+ 'peek into the fact graph to identify predicates that are both '+ 'derived and base. This is expensive for large fact graphs'+ 'and is explicitely not used against SPARQL endpoints') op.add_option('--property', action='append', dest='properties', default=[], metavar='QNAME', help='Used with --output=man-owl or --extract to determine which '+ 'properties are serialized / extracted. Can be used more than once') op.add_option('--normalize', action='store_true', default=False, help="Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"+ "The default is %default") op.add_option('--ddlGraph', default=False, help="The location of a N3 Data Description document describing the IDB predicates") op.add_option('--input-format', default='xml', dest='inputFormat', metavar='RDF_FORMAT', choices=['xml', 'trix', 'n3', 'nt', 'rdfa'], help="The format of the RDF document(s) which serve as the initial facts "+ " for the RETE network. One of 'xml','n3','trix', 'nt', "+ "or 'rdfa'. The default is %default") op.add_option('--safety', default='none', metavar='RULE_SAFETY', choices=['loose', 'strict', 'none'], help="Determines how to handle RIF Core safety. A value of 'loose' "+ " means that unsafe rules will be ignored. A value of 'strict' "+ " will cause a syntax exception upon any unsafe rule. A value of "+ "'none' (the default) does nothing") op.add_option('--pDSemantics', action='store_true', default=False, help='Used with --dlp to add pD semantics ruleset for semantics not covered '+ 'by DLP but can be expressed in definite Datalog Logic Programming'+ ' The default is %default') op.add_option('--stdin', action='store_true', default=False, help='Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ') op.add_option('--ns', action='append', default=[], metavar="PREFIX=URI", help='Register a namespace binding (QName prefix to a base URI). This '+ 'can be used more than once') op.add_option('--rules', default=[], action='append', metavar='PATH_OR_URI', help='The Notation 3 documents to use as rulesets for the RETE network'+ '. Can be specified more than once') op.add_option('-d', '--debug', action='store_true', default=False, help='Include debugging output') op.add_option('--strictness', default='defaultBase', metavar='DDL_STRICTNESS', choices=['loose', 'defaultBase', 'defaultDerived', 'harsh'], help='Used with --why to specify whether to: *not* check if predicates are '+ ' both derived and base (loose), if they are, mark as derived (defaultDerived) '+ 'or as base (defaultBase) predicates, else raise an exception (harsh)') op.add_option('--method', default='naive', metavar='reasoning algorithm', choices=['gms', 'bfp', 'naive'], help='Used with --why to specify how to evaluate answers for query. '+ 'One of: gms,bfp,naive') op.add_option('--firstAnswer', default=False, action='store_true', help='Used with --why to determine whether to fetch all answers or just '+ 'the first') op.add_option('--edb', default=[], action='append', metavar='EXTENSIONAL_DB_PREDICATE_QNAME', help='Used with --why/--strictness=defaultDerived to specify which clashing '+ 'predicate will be designated as a base predicate') op.add_option('--idb', default=[], action='append', metavar='INTENSIONAL_DB_PREDICATE_QNAME', help='Used with --why/--strictness=defaultBase to specify which clashing '+ 'predicate will be designated as a derived predicate') op.add_option('--hybridPredicate', default=[], action='append', metavar='PREDICATE_QNAME', help='Used with --why to explicitely specify a hybrid predicate (in both '+ ' IDB and EDB) ') op.add_option('--noMagic', default=[], action='append', metavar='DB_PREDICATE_QNAME', help='Used with --why to specify that the predicate shouldnt have its '+ 'magic sets calculated') op.add_option('--filter', action='append', default=[], metavar='PATH_OR_URI', help='The Notation 3 documents to use as a filter (entailments do not particpate in network)') op.add_option('--ruleFacts', action='store_true', default=False, help="Determines whether or not to attempt to parse initial facts from "+ "the rule graph. The default is %default") op.add_option('--builtins', default=False, metavar='PATH_TO_PYTHON_MODULE', help="The path to a python module with function definitions (and a "+ "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations") op.add_option('--dlp', action='store_true', default=False, help='Use Description Logic Programming (DLP) to extract rules from OWL/RDF. The default is %default') op.add_option('--sparqlEndpoint', action='store_true', default=False, help='Indicates that the sole argument is the URI of a SPARQL endpoint to query') op.add_option('--ontology', action='append', default=[], metavar='PATH_OR_URI', help='The path to an OWL RDF/XML graph to use DLP to extract rules from '+ '(other wise, fact graph(s) are used) ') op.add_option('--ruleFormat', default='n3', dest='ruleFormat', metavar='RULE_FORMAT', choices=['n3', 'rif'], help="The format of the rules to parse ('n3', 'rif'). The default is %default") op.add_option('--ontologyFormat', default='xml', dest='ontologyFormat', metavar='RDF_FORMAT', choices=['xml', 'trix', 'n3', 'nt', 'rdfa'], help="The format of the OWL RDF/XML graph specified via --ontology. The default is %default") op.add_option('--builtinTemplates', default=None, metavar='N3_DOC_PATH_OR_URI', help='The path to an N3 document associating SPARQL FILTER templates to '+ 'rule builtins') op.add_option('--negation', action='store_true', default=False, help='Extract negative rules?') op.add_option('--normalForm', action='store_true', default=False, help='Whether or not to reduce DL axioms & LP rules to a normal form') (options, facts) = op.parse_args() nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'} for nsBind in options.ns: pref, nsUri = nsBind.split('=') nsBinds[pref]=nsUri namespace_manager = NamespaceManager(Graph()) if options.sparqlEndpoint: factGraph = Graph(plugin.get('SPARQL', Store)(facts[0])) options.hybrid = False else: factGraph = Graph() ruleSet = Ruleset() for fileN in options.rules: if options.ruleFacts and not options.sparqlEndpoint: factGraph.parse(fileN, format='n3') print("Parsing RDF facts from %s" % fileN) if options.builtins: import imp userFuncs = imp.load_source('builtins', options.builtins) rs = HornFromN3(fileN, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS) nsBinds.update(rs.nsMapping) elif options.ruleFormat == 'rif': try: from FuXi.Horn.RIFCore import RIFCoreParser rif_parser = RIFCoreParser(location=fileN, debug=options.debug) rs = rif_parser.getRuleset() except ImportError: raise Exception( "Missing 3rd party libraries for RIF processing" ) else: rs = HornFromN3(fileN) nsBinds.update(rs.nsMapping) ruleSet.formulae.extend(rs) #ruleGraph.parse(fileN,format='n3') ruleSet.nsMapping = nsBinds for prefix, uri in list(nsBinds.items()): namespace_manager.bind(prefix, uri, override=False) closureDeltaGraph = Graph() closureDeltaGraph.namespace_manager = namespace_manager factGraph.namespace_manager = namespace_manager if not options.sparqlEndpoint: for fileN in facts: factGraph.parse(fileN, format=options.inputFormat) if options.imports: for owlImport in factGraph.objects(predicate=OWL_NS.imports): factGraph.parse(owlImport) print("Parsed Semantic Web Graph.. %s" % owlImport) if not options.sparqlEndpoint and facts: for pref, uri in factGraph.namespaces(): nsBinds[pref]=uri if options.stdin: assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint" factGraph.parse(sys.stdin, format=options.inputFormat) #Normalize namespace mappings #prune redundant, rdflib-allocated namespace prefix mappings newNsMgr = NamespaceManager(factGraph) from FuXi.Rete.Util import CollapseDictionary for k, v in list(CollapseDictionary(dict([(k, v) for k, v in factGraph.namespaces()])).items()): newNsMgr.bind(k, v) factGraph.namespace_manager = newNsMgr if options.normalForm: NormalFormReduction(factGraph) if not options.sparqlEndpoint: workingMemory = generateTokenSet(factGraph) if options.builtins: import imp userFuncs = imp.load_source('builtins', options.builtins) rule_store, rule_graph, network = SetupRuleStore( makeNetwork=True, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS) else: rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True) network.inferredFacts = closureDeltaGraph network.nsMap = nsBinds if options.dlp: from FuXi.DLP.DLNormalization import NormalFormReduction if options.ontology: ontGraph = Graph() for fileN in options.ontology: ontGraph.parse(fileN, format=options.ontologyFormat) for prefix, uri in ontGraph.namespaces(): nsBinds[prefix] = uri namespace_manager.bind(prefix, uri, override=False) if options.sparqlEndpoint: factGraph.store.bind(prefix, uri) else: ontGraph=factGraph NormalFormReduction(ontGraph) dlp=network.setupDescriptionLogicProgramming( ontGraph, addPDSemantics=options.pDSemantics, constructNetwork=False, ignoreNegativeStratus=options.negation, safety=safetyNameMap[options.safety]) ruleSet.formulae.extend(dlp) if options.output == 'rif' and not options.why: for rule in ruleSet: print(rule) if options.negation: for nRule in network.negRules: print(nRule) elif options.output == 'man-owl': cGraph = network.closureGraph(factGraph, readOnly=False) cGraph.namespace_manager = namespace_manager Individual.factoryGraph = cGraph if options.classes: mapping = dict(namespace_manager.namespaces()) for c in options.classes: pref, uri = c.split(':') print(Class(URIRef(mapping[pref] + uri)).__repr__(True)) elif options.properties: mapping = dict(namespace_manager.namespaces()) for p in options.properties: pref, uri = p.split(':') print(Property(URIRef(mapping[pref] + uri))) else: for p in AllProperties(cGraph): print(p.identifier, first(p.label)) print(repr(p)) for c in AllClasses(cGraph): if options.normalize: if c.isPrimitive(): primAnc = [sc for sc in c.subClassOf if sc.isPrimitive()] if len(primAnc) > 1: warnings.warn("Branches of primitive skeleton taxonomy" + " should form trees: %s has %s primitive parents: %s" % ( c.qname, len(primAnc), primAnc), UserWarning, 1) children = [desc for desc in c.subSumpteeIds()] for child in children: for otherChild in [o for o in children if o is not child]: if not otherChild in [c.identifier for c in Class(child).disjointWith]: # and\ warnings.warn("Primitive children (of %s) " % (c.qname) + "must be mutually disjoint: %s and %s" % ( Class(child).qname, Class(otherChild).qname), UserWarning, 1) # if not isinstance(c.identifier,BNode): print(c.__repr__(True)) if not options.why: #Naive construction of graph for rule in ruleSet: network.buildNetworkFromClause(rule) magicSeeds=[] if options.why: builtinTemplateGraph = Graph() if options.builtinTemplates: builtinTemplateGraph = Graph().parse(options.builtinTemplates, format='n3') factGraph.templateMap = \ dict([(pred, template) for pred, _ignore, template in builtinTemplateGraph.triples( (None, TEMPLATES.filterTemplate, None))]) goals = [] query = ParseSPARQL(options.why) network.nsMap['pml'] = PML network.nsMap['gmp'] = GMP_NS network.nsMap['owl'] = OWL_NS nsBinds.update(network.nsMap) network.nsMap = nsBinds if not query.prolog: query.prolog = Prolog(None, []) query.prolog.prefixBindings.update(nsBinds) else: for prefix, nsInst in list(nsBinds.items()): if prefix not in query.prolog.prefixBindings: query.prolog.prefixBindings[prefix] = nsInst goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern( query.query.whereClause.parsedGraphPattern, query.prolog).patterns]) # dPreds=[]# p for s,p,o in goals ] magicRuleNo = 0 bottomUpDerivedPreds = [] # topDownDerivedPreds = [] defaultBasePreds = [] defaultDerivedPreds = set() hybridPredicates = [] mapping = dict(newNsMgr.namespaces()) for edb in options.edb: pref, uri = edb.split(':') defaultBasePreds.append(URIRef(mapping[pref] + uri)) noMagic = [] for pred in options.noMagic: pref, uri = pred.split(':') noMagic.append(URIRef(mapping[pref] + uri)) if options.ddlGraph: ddlGraph = Graph().parse(options.ddlGraph, format='n3') # @TODO: should also get hybrid predicates from DDL graph defaultDerivedPreds=IdentifyDerivedPredicates( ddlGraph, Graph(), ruleSet) else: for idb in options.idb: pref, uri = idb.split(':') defaultDerivedPreds.add(URIRef(mapping[pref] + uri)) defaultDerivedPreds.update( set([p == RDF.type and o or p for s, p, o in goals])) for hybrid in options.hybridPredicate: pref, uri = hybrid.split(':') hybridPredicates.append(URIRef(mapping[pref]+uri)) if options.method == 'gms': for goal in goals: goalSeed=AdornLiteral(goal).makeMagicPred() print("Magic seed fact (used in bottom-up evaluation) %s" % goalSeed) magicSeeds.append(goalSeed.toRDFTuple()) if noMagic: print("Predicates whose magic sets will not be calculated") for p in noMagic: print("\t%s" % factGraph.qname(p)) for rule in MagicSetTransformation( factGraph, ruleSet, goals, derivedPreds=bottomUpDerivedPreds, strictCheck=nameMap[options.strictness], defaultPredicates=(defaultBasePreds, defaultDerivedPreds), noMagic=noMagic): magicRuleNo+=1 network.buildNetworkFromClause(rule) if len(list(ruleSet)): print("reduction in size of program: %s (%s -> %s clauses)" % ( 100 - (float(magicRuleNo) / float(len(list(ruleSet))) ) * 100, len(list(ruleSet)), magicRuleNo)) start = time.time() network.feedFactsToAdd(generateTokenSet(magicSeeds)) if not [ rule for rule in factGraph.adornedProgram if len(rule.sip)]: warnings.warn( "Using GMS sideways information strategy with no "+ "information to pass from query. Falling back to "+ "naive method over given facts and rules") network.feedFactsToAdd(workingMemory) sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds"%sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds"%sTime print("Time to calculate closure on working memory: %s" % sTimeStr) if options.output == 'rif': print("Rules used for bottom-up evaluation") if network.rules: for clause in network.rules: print(clause) else: for clause in factGraph.adornedProgram: print(clause) if options.output == 'conflict': network.reportConflictSet() elif options.method == 'bfp': topDownDPreds = defaultDerivedPreds if options.builtinTemplates: builtinTemplateGraph = Graph().parse(options.builtinTemplates, format='n3') builtinDict = dict([(pred, template) for pred, _ignore, template in builtinTemplateGraph.triples( (None, TEMPLATES.filterTemplate, None))]) else: builtinDict = None topDownStore=TopDownSPARQLEntailingStore( factGraph.store, factGraph, idb=ruleSet, DEBUG=options.debug, derivedPredicates=topDownDPreds, templateMap=builtinDict, nsBindings=network.nsMap, identifyHybridPredicates=options.hybrid \ if options.method == 'bfp' else False, hybridPredicates=hybridPredicates) targetGraph = Graph(topDownStore) for pref, nsUri in list(network.nsMap.items()): targetGraph.bind(pref, nsUri) start = time.time() # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal) # for goal in goals], # targetGraph) # query = queryLiteral.asSPARQL() # print >>sys.stderr, "Goal to solve ", query sTime = time.time() - start result = targetGraph.query(options.why, initNs=network.nsMap) if result.askAnswer: sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds"%sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds"%sTime print("Time to reach answer ground goal answer of %s: %s" % ( result.askAnswer[0], sTimeStr)) else: for rt in result: sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds" % sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds" % sTime if options.firstAnswer: break print( "Time to reach answer %s via top-down SPARQL sip strategy: %s" % ( rt, sTimeStr)) if options.output == 'conflict' and options.method == 'bfp': for _network, _goal in topDownStore.queryNetworks: print(_network, _goal) _network.reportConflictSet(options.debug) for query in topDownStore.edbQueries: print(query.asSPARQL()) elif options.method == 'naive': start = time.time() network.feedFactsToAdd(workingMemory) sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds"%sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds"%sTime print("Time to calculate closure on working memory: %s" % sTimeStr) print(network) if options.output == 'conflict': network.reportConflictSet() for fileN in options.filter: for rule in HornFromN3(fileN): network.buildFilterNetworkFromClause(rule) if options.negation and network.negRules and options.method in ['both', 'bottomUp']: now=time.time() rt=network.calculateStratifiedModel(factGraph) print("Time to calculate stratified, stable model (inferred %s facts): %s" % ( rt, time.time()-now)) if options.filter: print("Applying filter to entailed facts") network.inferredFacts = network.filteredFacts if options.closure \ and options.output in RDF_SERIALIZATION_FORMATS: cGraph = network.closureGraph(factGraph) cGraph.namespace_manager = namespace_manager print(cGraph.serialize(destination=None, format=options.output, base=None)) elif options.output and options.output in RDF_SERIALIZATION_FORMATS: print(network.inferredFacts.serialize(destination=None, format=options.output, base=None))
def __init__(self, ruleStore, name=None, initialWorkingMemory=None, inferredTarget=None, nsMap={}, graphVizOutFile=None, dontFinalize=False, goal=None): self.leanCheck = {} self.goal = goal self.nsMap = nsMap self.name = name and name or BNode() self.nodes = {} self.alphaPatternHash = {} self.ruleSet = set() for alphaPattern in xcombine(('1', '0'), ('1', '0'), ('1', '0')): self.alphaPatternHash[tuple(alphaPattern)] = {} if inferredTarget is None: self.inferredFacts = Graph() namespace_manager = NamespaceManager(self.inferredFacts) for k, v in list(nsMap.items()): namespace_manager.bind(k, v) self.inferredFacts.namespace_manager = namespace_manager else: self.inferredFacts = inferredTarget self.workingMemory = initialWorkingMemory and initialWorkingMemory or set() self.proofTracers = {} self.terminalNodes = set() self.instantiations = {} start = time.time() self.ruleStore = ruleStore self.justifications = {} self.dischargedBindings = {} if not dontFinalize: self.ruleStore._finalize() self.filteredFacts = Graph() # 'Universal truths' for a rule set are rules where the LHS is empty. # Rather than automatically adding them to the working set, alpha nodes are 'notified' # of them, so they can be checked for while performing inter element # tests. self.universalTruths = [] from FuXi.Horn.HornRules import Ruleset self.rules = set() self.negRules = set() for rule in Ruleset(n3Rules=self.ruleStore.rules, nsMapping=self.nsMap): import warnings warnings.warn( "Rules in a network should be built *after* construction via " + " self.buildNetworkClause(HornFromN3(n3graph)) for instance", DeprecationWarning, 2) self.buildNetworkFromClause(rule) self.alphaNodes = [ node for node in list(self.nodes.values()) if isinstance(node, AlphaNode)] self.alphaBuiltInNodes = [node for node in list( self.nodes.values()) if isinstance(node, BuiltInAlphaNode)] self._setupDefaultRules() if initialWorkingMemory: start = time.time() self.feedFactsToAdd(initialWorkingMemory) print("Time to calculate closure on working memory: %s m seconds" % ( (time.time() - start) * 1000)) if graphVizOutFile: print("Writing out RETE network to ", graphVizOutFile) renderNetwork(self, nsMap=nsMap).write(graphVizOutFile)
def main(): from optparse import OptionParser op = OptionParser('usage: %prog [options] factFile1 factFile2 ... factFileN') op.add_option('--closure', action='store_true', default=False, help = 'Whether or not to serialize the inferred triples'+ ' along with the original triples. Otherwise '+ '(the default behavior), serialize only the inferred triples') op.add_option('--output', default='n3', metavar='RDF_FORMAT', choices = ['xml', 'TriX', 'n3', 'nt', 'rif', 'rif-xml', 'conflict', 'man-owl'], help = "Serialize the inferred triples and/or original RDF triples to STDOUT "+ "using the specified RDF syntax ('xml','pretty-xml','nt','turtle', "+ "or 'n3') or to print a summary of the conflict set (from the RETE "+ "network) if the value of this option is 'conflict'. If the the "+ " value is 'rif' or 'rif-xml', Then the rules used for inference "+ "will be serialized as RIF. Finally if the value is 'man-owl', then "+ "the RDF facts are assumed to be OWL/RDF and serialized via Manchester OWL "+ "syntax. The default is %default") op.add_option('--class', dest='classes', action='append', default=[], metavar='QNAME', help = 'Used with --output=man-owl to determine which '+ 'classes within the entire OWL/RDF are targetted for serialization'+ '. Can be used more than once') op.add_option('--property', action='append', dest='properties', default=[], metavar='QNAME', help = 'Used with --output=man-owl or --extract to determine which '+ 'properties are serialized / extracted. Can be used more than once') op.add_option('--normalize', action='store_true', default=False, help = "Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"+ "The default is %default") op.add_option('--input-format', default='xml', dest='inputFormat', metavar='RDF_FORMAT', choices = ['xml', 'trix', 'n3', 'nt', 'rdfa'], help = "The format of the RDF document(s) which serve as the initial facts "+ " for the RETE network. One of 'xml','n3','trix', 'nt', "+ "or 'rdfa'. The default is %default") op.add_option('--pDSemantics', action='store_true', default=False, help = 'Used with --dlp to add pD semantics ruleset for semantics not covered '+ 'by DLP but can be expressed in definite Datalog Logic Programming'+ ' The default is %default') op.add_option('--stdin', action='store_true', default=False, help = 'Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ') op.add_option('--ns', action='append', default=[], metavar="PREFIX=URI", help = 'Register a namespace binding (QName prefix to a base URI). This '+ 'can be used more than once') op.add_option('--rules', default=[], action='append', default=[], metavar='PATH_OR_URI', help = 'The Notation 3 documents to use as rulesets for the RETE network'+ '. Can be specified more than once') op.add_option('--ruleFacts', action='store_true', default=False, help = "Determines whether or not to attempt to parse initial facts from "+ "the rule graph. The default is %default") op.add_option('--dlp', action='store_true', default=False, help = 'Use Description Logic Programming (DLP) to extract rules from OWL/RDF. The default is %default') (options, facts) = op.parse_args() nsBinds = {'iw':'http://inferenceweb.stanford.edu/2004/07/iw.owl#'} for nsBind in options.ns: pref,nsUri = nsBind.split('=') nsBinds[pref]=nsUri namespace_manager = NamespaceManager(Graph()) factGraph = Graph() ruleSet = Ruleset() for fileN in options.rules: if options.ruleFacts: factGraph.parse(fileN,format='n3') print >>sys.stderr,"Parsing RDF facts from ", fileN rs = HornFromN3(fileN) nsBinds.update(rs.nsMapping) ruleSet.formulae.extend(rs) #ruleGraph.parse(fileN,format='n3') ruleSet.nsMapping = nsBinds for prefix,uri in nsBinds.items(): namespace_manager.bind(prefix, uri, override=False) closureDeltaGraph = Graph() closureDeltaGraph.namespace_manager = namespace_manager factGraph.namespace_manager = namespace_manager for fileN in facts: factGraph.parse(fileN,format=options.inputFormat) if options.stdin: factGraph.parse(sys.stdin,format=options.inputFormat) workingMemory = generateTokenSet(factGraph) rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True) network.inferredFacts = closureDeltaGraph network.nsMap = nsBinds if options.dlp: dlp=setupDescriptionLogicProgramming(factGraph, addPDSemantics=options.pDSemantics, constructNetwork=False) ruleSet.formulae.extend(dlp) if options.output == 'rif': for rule in ruleSet: print rule elif options.output == 'man-owl': cGraph = network.closureGraph(factGraph,readOnly=False) cGraph.namespace_manager = namespace_manager Individual.factoryGraph = cGraph if options.classes: mapping = dict(namespace_manager.namespaces()) for c in options.classes: pref,uri=c.split(':') print Class(URIRef(mapping[pref]+uri)).__repr__(True) elif options.properties: mapping = dict(namespace_manager.namespaces()) for p in options.properties: pref,uri=p.split(':') print Property(URIRef(mapping[pref]+uri)) else: for p in AllProperties(cGraph): print p.identifier print repr(p) for c in AllClasses(cGraph): if options.normalize: if c.isPrimitive(): primAnc = [sc for sc in c.subClassOf if sc.isPrimitive()] if len(primAnc)>1: warnings.warn("Branches of primitive skeleton taxonomy"+ " should form trees: %s has %s primitive parents: %s"%( c.qname,len(primAnc),primAnc),UserWarning,1) children = [desc for desc in c.subSumpteeIds()] for child in children: for otherChild in [o for o in children if o is not child]: if not otherChild in [c.identifier for c in Class(child).disjointWith]:# and\ warnings.warn("Primitive children (of %s) "+ "must be mutually disjoint: %s and %s"%( c.qname, Class(child).qname, Class(otherChild).qname),UserWarning,1) if not isinstance(c.identifier,BNode): print c.__repr__(True) for rule in ruleSet: network.buildNetworkFromClause(rule) start = time.time() network.feedFactsToAdd(workingMemory) sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds"%sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds"%sTime print >>sys.stderr,"Time to calculate closure on working memory: ",sTimeStr print >>sys.stderr, network if options.output == 'conflict': network.reportConflictSet() elif options.output not in ['rif','rif-xml','man-owl']: if options.closure: cGraph = network.closureGraph(factGraph) cGraph.namespace_manager = namespace_manager print cGraph.serialize(destination=None, format=options.output, base=None) else: print network.inferredFacts.serialize(destination=None, format=options.output, base=None)
def main(): from optparse import OptionParser op = OptionParser('usage: %prog [options] factFile1 factFile2 ... factFileN') op.add_option('--why', default=None, help='Specifies the goals to solve for using the non-naive methods' + 'see --method') op.add_option('--closure', action='store_true', default=False, help='Whether or not to serialize the inferred triples' + ' along with the original triples. Otherwise ' + '(the default behavior), serialize only the inferred triples') op.add_option('--imports', action='store_true', default=False, help='Whether or not to follow owl:imports in the fact graph') op.add_option('--output', default='n3', metavar='RDF_FORMAT', choices=['xml', 'TriX', 'n3', 'pml', 'proof-graph', 'nt', 'rif', 'rif-xml', 'conflict', 'man-owl'], help="Serialize the inferred triples and/or original RDF triples to STDOUT " + "using the specified RDF syntax ('xml', 'pretty-xml', 'nt', 'turtle', " + "or 'n3') or to print a summary of the conflict set (from the RETE " + "network) if the value of this option is 'conflict'. If the the " + " value is 'rif' or 'rif-xml', Then the rules used for inference " + "will be serialized as RIF. If the value is 'pml' and --why is used, " + " then the PML RDF statements are serialized. If output is " + "'proof-graph then a graphviz .dot file of the proof graph is printed. " + "Finally if the value is 'man-owl', then the RDF facts are assumed " + "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default") op.add_option('--class', dest='classes', action='append', default=[], metavar='QNAME', help='Used with --output=man-owl to determine which ' + 'classes within the entire OWL/RDF are targetted for serialization' + '. Can be used more than once') op.add_option('--hybrid', action='store_true', default=False, help='Used with with --method=bfp to determine whether or not to ' + 'peek into the fact graph to identify predicates that are both ' + 'derived and base. This is expensive for large fact graphs' + 'and is explicitely not used against SPARQL endpoints') op.add_option('--property', action='append', dest='properties', default=[], metavar='QNAME', help='Used with --output=man-owl or --extract to determine which ' + 'properties are serialized / extracted. Can be used more than once') op.add_option('--normalize', action='store_true', default=False, help="Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]" + "The default is %default") op.add_option('--ddlGraph', default=False, help="The location of a N3 Data Description document describing the IDB predicates") op.add_option('--input-format', default='xml', dest='inputFormat', metavar='RDF_FORMAT', choices=['xml', 'trix', 'n3', 'nt', 'rdfa'], help="The format of the RDF document(s) which serve as the initial facts " + " for the RETE network. One of 'xml', 'n3', 'trix', 'nt', " + "or 'rdfa'. The default is %default") op.add_option('--safety', default='none', metavar='RULE_SAFETY', choices=['loose', 'strict', 'none'], help="Determines how to handle RIF Core safety. A value of 'loose' " + " means that unsafe rules will be ignored. A value of 'strict' " + " will cause a syntax exception upon any unsafe rule. A value of " + "'none' (the default) does nothing") op.add_option('--pDSemantics', action='store_true', default=False, help='Used with --dlp to add pD semantics ruleset for semantics not covered ' + 'by DLP but can be expressed in definite Datalog Logic Programming' + ' The default is %default') op.add_option('--stdin', action='store_true', default=False, help='Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ') op.add_option('--ns', action='append', default=[], metavar="PREFIX=URI", help='Register a namespace binding (QName prefix to a base URI). This ' + 'can be used more than once') op.add_option('--rules', default=[], action='append', metavar='PATH_OR_URI', help='The Notation 3 documents to use as rulesets for the RETE network' + '. Can be specified more than once') op.add_option('-d', '--debug', action='store_true', default=True, help='Include debugging output') op.add_option('--strictness', default='defaultBase', metavar='DDL_STRICTNESS', choices=['loose', 'defaultBase', 'defaultDerived', 'harsh'], help='Used with --why to specify whether to: *not* check if predicates are ' + ' both derived and base (loose), if they are, mark as derived (defaultDerived) ' + 'or as base (defaultBase) predicates, else raise an exception (harsh)') op.add_option('--method', default='naive', metavar='reasoning algorithm', choices=['gms', 'bfp', 'naive'], help='Used with --why to specify how to evaluate answers for query. ' + 'One of: gms, sld, bfp, naive') op.add_option('--firstAnswer', default=False, action='store_true', help='Used with --why to determine whether to fetch all answers or just ' + 'the first') op.add_option('--edb', default=[], action='append', metavar='EXTENSIONAL_DB_PREDICATE_QNAME', help='Used with --why/--strictness=defaultDerived to specify which clashing ' + 'predicate will be designated as a base predicate') op.add_option('--idb', default=[], action='append', metavar='INTENSIONAL_DB_PREDICATE_QNAME', help='Used with --why/--strictness=defaultBase to specify which clashing ' + 'predicate will be designated as a derived predicate') op.add_option('--hybridPredicate', default=[], action='append', metavar='PREDICATE_QNAME', help='Used with --why to explicitely specify a hybrid predicate (in both ' + ' IDB and EDB) ') op.add_option('--noMagic', default=[], action='append', metavar='DB_PREDICATE_QNAME', help='Used with --why to specify that the predicate shouldnt have its ' + 'magic sets calculated') op.add_option('--filter', action='append', default=[], metavar='PATH_OR_URI', help='The Notation 3 documents to use as a filter (entailments do not particpate in network)') op.add_option('--ruleFacts', action='store_true', default=False, help="Determines whether or not to attempt to parse initial facts from " + "the rule graph. The default is %default") op.add_option('--builtins', default=False, metavar='PATH_TO_PYTHON_MODULE', help="The path to a python module with function definitions (and a " + "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations") op.add_option('--dlp', action='store_true', default=False, help='Use Description Logic Programming (DLP) to extract rules from OWL/RDF. The default is %default') op.add_option('--sparqlEndpoint', action='store_true', default=False, help='Indicates that the sole argument is the URI of a SPARQL endpoint to query') op.add_option('--ontology', action='append', default=[], metavar='PATH_OR_URI', help='The path to an OWL RDF/XML graph to use DLP to extract rules from ' + '(other wise, fact graph(s) are used) ') op.add_option('--ontologyFormat', default='xml', dest='ontologyFormat', metavar='RDF_FORMAT', choices=['xml', 'trix', 'n3', 'nt', 'rdfa'], help="The format of the OWL RDF/XML graph specified via --ontology. The default is %default") op.add_option('--builtinTemplates', default=None, metavar='N3_DOC_PATH_OR_URI', help='The path to an N3 document associating SPARQL FILTER templates to ' + 'rule builtins') op.add_option('--negation', action='store_true', default=False, help='Extract negative rules?') op.add_option('--normalForm', action='store_true', default=False, help='Whether or not to reduce DL axioms & LP rules to a normal form') (options, facts) = op.parse_args() nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'} for nsBind in options.ns: pref, nsUri = nsBind.split('=') nsBinds[pref] = nsUri namespace_manager = NamespaceManager(Graph()) if options.sparqlEndpoint: factGraph = Graph(plugin.get('SPARQLStore', Store)(facts[0])) options.hybrid = False else: factGraph = Graph() ruleSet = Ruleset() for fileN in options.rules: if options.ruleFacts and not options.sparqlEndpoint: factGraph.parse(fileN, format='n3') print("Parsing RDF facts from ", fileN) if options.builtins: import imp userFuncs = imp.load_source('builtins', options.builtins) rs = HornFromN3(fileN, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS) else: rs = HornFromN3(fileN) nsBinds.update(rs.nsMapping) ruleSet.formulae.extend(rs) #ruleGraph.parse(fileN, format='n3') ruleSet.nsMapping = nsBinds for prefix, uri in list(nsBinds.items()): namespace_manager.bind(prefix, uri, override=False) closureDeltaGraph = Graph() closureDeltaGraph.namespace_manager = namespace_manager factGraph.namespace_manager = namespace_manager if not options.sparqlEndpoint: for fileN in facts: factGraph.parse(fileN, format=options.inputFormat) if options.imports: for owlImport in factGraph.objects(predicate=OWL_NS.imports): factGraph.parse(owlImport) print("Parsed Semantic Web Graph.. ", owlImport) if facts: for pref, uri in factGraph.namespaces(): nsBinds[pref] = uri if options.stdin: assert not options.sparqlEndpoint, ( "Cannot use --stdin with --sparqlEndpoint") factGraph.parse(sys.stdin, format=options.inputFormat) # Normalize namespace mappings # prune redundant, rdflib-allocated namespace prefix mappings new_ns_mgr = NamespaceManager(factGraph) from FuXi.Rete.Util import CollapseDictionary for k, v in CollapseDictionary(dict([(k, v) for k, v in factGraph.namespaces()])).items(): new_ns_mgr.bind(k, v) factGraph.namespace_manager = new_ns_mgr if options.normalForm: NormalFormReduction(factGraph) if not options.sparqlEndpoint: workingMemory = generateTokenSet(factGraph) if options.builtins: import imp userFuncs = imp.load_source('builtins', options.builtins) rule_store, rule_graph, network = SetupRuleStore( makeNetwork=True, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS) else: rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True) network.inferredFacts = closureDeltaGraph network.nsMap = nsBinds if options.dlp: from FuXi.DLP.DLNormalization import NormalFormReduction if options.ontology: ontGraph = Graph() for fileN in options.ontology: ontGraph.parse(fileN, format=options.ontologyFormat) for prefix, uri in ontGraph.namespaces(): nsBinds[prefix] = uri namespace_manager.bind(prefix, uri, override=False) if options.sparqlEndpoint: factGraph.store.bind(prefix, uri) else: ontGraph = factGraph NormalFormReduction(ontGraph) dlp = network.setupDescriptionLogicProgramming( ontGraph, addPDSemantics=options.pDSemantics, constructNetwork=False, ignoreNegativeStratus=options.negation, safety=safetyNameMap[options.safety]) ruleSet.formulae.extend(dlp) if options.output == 'rif' and not options.why: for rule in ruleSet: print(rule) if options.negation: for nRule in network.negRules: print(nRule) elif options.output == 'man-owl': cGraph = network.closureGraph(factGraph, readOnly=False) cGraph.namespace_manager = namespace_manager Individual.factoryGraph = cGraph if options.classes: mapping = dict(namespace_manager.namespaces()) for c in options.classes: pref, uri = c.split(':') print(Class(URIRef(mapping[pref] + uri)).__repr__(True)) elif options.properties: mapping = dict(namespace_manager.namespaces()) for p in options.properties: pref, uri = p.split(':') print(Property(URIRef(mapping[pref] + uri))) else: for p in AllProperties(cGraph): print(p.identifier, first(p.label)) print(repr(p)) for c in AllClasses(cGraph): if options.normalize: if c.isPrimitive(): primAnc = [sc for sc in c.subClassOf if sc.isPrimitive()] if len(primAnc) > 1: warnings.warn("Branches of primitive skeleton taxonomy" + " should form trees: %s has %s primitive parents: %s" % ( c.qname, len(primAnc), primAnc), UserWarning, 1) children = [desc for desc in c.subSumpteeIds()] for child in children: for otherChild in [o for o in children if o is not child]: if not otherChild in [c.identifier for c in Class(child).disjointWith]: # and \ warnings.warn( "Primitive children (of %s) " % (c.qname) + \ "must be mutually disjoint: %s and %s" % ( Class(child).qname, Class(otherChild).qname), UserWarning, 1) # if not isinstance(c.identifier, BNode): print(c.__repr__(True)) if not options.why: # Naive construction of graph for rule in ruleSet: network.buildNetworkFromClause(rule) if options.why: why(options, factGraph, network, nsBinds, ruleSet, workingMemory) elif options.method == 'naive': start = time.time() network.feedFactsToAdd(workingMemory) sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds" % sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds" % sTime print("Time to calculate closure on working memory: ", sTimeStr) print(network) if options.output == 'conflict': network.reportConflictSet() for fileN in options.filter: for rule in HornFromN3(fileN): network.buildFilterNetworkFromClause(rule) if options.negation and network.negRules and options.method in [ 'both', 'bottomUp']: now = time.time() rt = network.calculateStratifiedModel(factGraph) print( "Time to calculate stratified, stable model" " (inferred %s facts): %s" % (rt, time.time() - now)) if options.filter: print("Applying filter to entailed facts") network.inferredFacts = network.filteredFacts if options.closure and options.output in RDF_SERIALIZATION_FORMATS: cGraph = network.closureGraph(factGraph) cGraph.namespace_manager = namespace_manager print(cGraph.serialize(destination=None, format=options.output, base=None)) elif options.output and options.output in RDF_SERIALIZATION_FORMATS: print(network.inferredFacts.serialize(destination=None, format=options.output, base=None))
def main(): from optparse import OptionParser op = OptionParser( 'usage: %prog [options] factFile1 factFile2 ... factFileN') op.add_option('--why', default=None, help='Specifies the goals to solve for') op.add_option( '--closure', action='store_true', default=False, help='Whether or not to serialize the inferred triples' + ' along with the original triples. Otherwise ' + '(the default behavior), serialize only the inferred triples') op.add_option( '--naive', action='store_true', default=False, help='Naively perform forward chaining over rules and facts using the ' + 'RETE network') op.add_option( '--imports', action='store_true', default=False, help='Whether or not to follow owl:imports in the fact graph') op.add_option( '--output', default='n3', metavar='RDF_FORMAT', choices=[ 'xml', 'TriX', 'n3', 'pml', 'proof-graph', 'nt', 'rif', 'rif-xml', 'conflict', 'man-owl' ], help= "Serialize the inferred triples and/or original RDF triples to STDOUT " + "using the specified RDF syntax ('xml','pretty-xml','nt','turtle', " + "or 'n3') or to print a summary of the conflict set (from the RETE " + "network) if the value of this option is 'conflict'. If the the " + " value is 'rif' or 'rif-xml', Then the rules used for inference " + "will be serialized as RIF. If the value is 'pml' and --why is used, " + " then the PML RDF statements are serialized. If output is " + "'proof-graph then a graphviz .dot file of the proof graph is printed. " + "Finally if the value is 'man-owl', then the RDF facts are assumed " + "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default" ) op.add_option( '--class', dest='classes', action='append', default=[], metavar='QNAME', help='Used with --output=man-owl to determine which ' + 'classes within the entire OWL/RDF are targetted for serialization' + '. Can be used more than once') op.add_option( '--hybrid', action='store_true', default=False, help='Used to determine whether or not to ' + 'peek into the fact graph to identify predicates that are both ' + 'derived and base. This is expensive for large fact graphs' + 'and is explicitely not used against SPARQL endpoints') op.add_option( '--property', action='append', dest='properties', default=[], metavar='QNAME', help='Used with --output=man-owl or --extract to determine which ' + 'properties are serialized / extracted. Can be used more than once') op.add_option( '--normalize', action='store_true', default=False, help= "Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]" + "The default is %default") op.add_option( '--ddlGraph', default=False, help= "The location of a N3 Data Description document describing the IDB predicates" ) op.add_option( '--input-format', default='xml', dest='inputFormat', metavar='RDF_FORMAT', choices=['xml', 'trix', 'n3', 'nt', 'rdfa'], help= "The format of the RDF document(s) which serve as the initial facts " + " for the RETE network. One of 'xml','n3','trix', 'nt', " + "or 'rdfa'. The default is %default") op.add_option( '--safety', default='none', metavar='RULE_SAFETY', choices=['loose', 'strict', 'none'], help="Determines how to handle RIF Core safety. A value of 'loose' " + " means that unsafe rules will be ignored. A value of 'strict' " + " will cause a syntax exception upon any unsafe rule. A value of " + "'none' (the default) does nothing") op.add_option( '--pDSemantics', action='store_true', default=False, help= 'Used with --dlp to add pD semantics ruleset for semantics not covered ' + 'by DLP but can be expressed in definite Datalog Logic Programming' + ' The default is %default') op.add_option( '--stdin', action='store_true', default=False, help= 'Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ' ) op.add_option( '--ns', action='append', default=[], metavar="PREFIX=URI", help='Register a namespace binding (QName prefix to a base URI). This ' + 'can be used more than once') op.add_option( '--rules', default=[], action='append', metavar='PATH_OR_URI', help='The Notation 3 documents to use as rulesets for the RETE network' + '. Can be specified more than once') op.add_option('-d', '--debug', action='store_true', default=False, help='Include debugging output') op.add_option( '--strictness', default='defaultBase', metavar='DDL_STRICTNESS', choices=['loose', 'defaultBase', 'defaultDerived', 'harsh'], help= 'Used with --why to specify whether to: *not* check if predicates are ' + ' both derived and base (loose), if they are, mark as derived (defaultDerived) ' + 'or as base (defaultBase) predicates, else raise an exception (harsh)') op.add_option( '--firstAnswer', default=False, action='store_true', help= 'Used with --why to determine whether to fetch all answers or just ' + 'the first') op.add_option( '--edb', default=[], action='append', metavar='EXTENSIONAL_DB_PREDICATE_QNAME', help= 'Used with --why/--strictness=defaultDerived to specify which clashing ' + 'predicate will be designated as a base predicate') op.add_option( '--idb', default=[], action='append', metavar='INTENSIONAL_DB_PREDICATE_QNAME', help= 'Used with --why/--strictness=defaultBase to specify which clashing ' + 'predicate will be designated as a derived predicate') op.add_option( '--hybridPredicate', default=[], action='append', metavar='PREDICATE_QNAME', help= 'Used with --why to explicitely specify a hybrid predicate (in both ' + ' IDB and EDB) ') op.add_option( '--noMagic', default=[], action='append', metavar='DB_PREDICATE_QNAME', help='Used with --why to specify that the predicate shouldnt have its ' + 'magic sets calculated') op.add_option( '--filter', action='append', default=[], metavar='PATH_OR_URI', help= 'The Notation 3 documents to use as a filter (entailments do not particpate in network)' ) op.add_option( '--ruleFacts', action='store_true', default=False, help="Determines whether or not to attempt to parse initial facts from " + "the rule graph. The default is %default") op.add_option( '--builtins', default=False, metavar='PATH_TO_PYTHON_MODULE', help="The path to a python module with function definitions (and a " + "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations" ) op.add_option( '--dlp', action='store_true', default=False, help= 'Use Description Logic Programming (DLP) to extract rules from OWL/RDF. The default is %default' ) op.add_option( '--sparqlEndpoint', action='store_true', default=False, help= 'Indicates that the sole argument is the URI of a SPARQL endpoint to query' ) op.add_option( '--ontology', action='append', default=[], metavar='PATH_OR_URI', help= 'The path to an OWL RDF/XML graph to use DLP to extract rules from ' + '(other wise, fact graph(s) are used) ') op.add_option( '--ruleFormat', default='n3', dest='ruleFormat', metavar='RULE_FORMAT', choices=['n3', 'rif'], help= "The format of the rules to parse ('n3', 'rif'). The default is %default" ) op.add_option( '--ontologyFormat', default='xml', dest='ontologyFormat', metavar='RDF_FORMAT', choices=['xml', 'trix', 'n3', 'nt', 'rdfa'], help= "The format of the OWL RDF/XML graph specified via --ontology. The default is %default" ) op.add_option( '--builtinTemplates', default=None, metavar='N3_DOC_PATH_OR_URI', help= 'The path to an N3 document associating SPARQL FILTER templates to ' + 'rule builtins') op.add_option( '--normalForm', action='store_true', default=False, help='Whether or not to reduce DL axioms & LP rules to a normal form') (options, facts) = op.parse_args() nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'} for nsBind in options.ns: pref, nsUri = nsBind.split('=') nsBinds[pref] = nsUri namespace_manager = NamespaceManager(Graph()) if options.sparqlEndpoint: factGraph = Graph(plugin.get('SPARQL', Store)(facts[0])) options.hybrid = False else: factGraph = Graph() ruleSet = Ruleset() for fileN in options.rules: if options.ruleFacts and not options.sparqlEndpoint: factGraph.parse(fileN, format='n3') print >> sys.stderr, "Parsing RDF facts from ", fileN if options.builtins: import imp userFuncs = imp.load_source('builtins', options.builtins) rs = HornFromN3(fileN, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS) nsBinds.update(rs.nsMapping) elif options.ruleFormat == 'rif': try: from FuXi.Horn.RIFCore import RIFCoreParser rif_parser = RIFCoreParser(location=fileN, debug=options.debug, nsBindings=nsBinds) rs, facts = rif_parser.getRuleset() except ImportError, e: raise Exception( "Missing 3rd party libraries for RIF processing: %s" % e) else: rs = HornFromN3(fileN) nsBinds.update(rs.nsMapping) ruleSet.formulae.extend(rs)