Exemple #1
0
 def parseN3Logic(self,src):
     store=N3RuleStore(additionalBuiltins=self.ruleStore.filters)
     Graph(store).parse(src,format='n3')
     store._finalize()
     assert len(store.rules),"There are no rules passed in!"
     from FuXi.Horn.HornRules import Ruleset
     for rule in Ruleset(n3Rules=store.rules,
                         nsMapping=self.nsMap):
         self.buildNetwork(iter(rule.formula.body),
                           iter(rule.formula.head),
                           rule)
         self.rules.add(rule)
     self.alphaNodes = [node for node in self.nodes.values() if isinstance(node,AlphaNode)]
     self.alphaBuiltInNodes = [node for node in self.nodes.values() if isinstance(node,BuiltInAlphaNode)]        
Exemple #2
0
def NormalizeLPDb(ruleGraph,fact_db):
    """
    For performance reasons, it 1s good to decompose the database into a set of
    pure base predicates (which can then be stored using a standard DBMS)
    and a set of pure derived predicates Fortunately, such a decomposition 1s 
    always possible, because every database can be rewritten ...as 
    database containing only base and derived predicates.    
    
    >>> ruleStore,ruleGraph=SetupRuleStore()
    >>> g=ruleGraph.parse(StringIO(PARTITION_LP_DB_PREDICATES),format='n3')
    >>> ruleStore._finalize()    
    >>> len(ruleStore.rules)
    1
    >>> factGraph=Graph().parse(StringIO(PARTITION_LP_DB_PREDICATES),format='n3')
    >>> rs=Ruleset(n3Rules=ruleStore.rules,nsMapping=ruleStore.nsMgr)
    >>> for i in rs: print i
    Forall ?Y ?X ?Z ( ex:grandfather(?X ?Y) :- And( ex:father(?X ?Z) ex:parent(?X ?Y) ) )
    >>> len(factGraph)
    4
    >>> print [p for p,iter in iter_non_base_non_derived_preds(rs,factGraph)]
    [rdflib.URIRef('http://doi.acm.org/10.1145/16856.16859#grandfather')]
    """
    candidatePreds=False
    rs=Ruleset(n3Rules=ruleGraph.store.rules,
               nsMapping=ruleStore.nsMgr)
    toAdd=[]
    for pred,replFacts in iter_non_base_non_derived_preds(rs,fact_db):
        replPred=URIRef(pred+'_ext')
        for s,p,o in replFacts:
            fact_db.remove((s,p,o))
            toAdd.append((s,replPred,o))
        head=Uniterm(pred,pred.arg)
        body=Uniterm(replPred,pred.arg)
        newRule=Rule(Clause(body,head),
                     [term for term in pred.arg if isinstance(term,Variable)])
        rs.append(newRule)
    return rs
Exemple #3
0
def main():
    from optparse import OptionParser
    op = OptionParser(
        'usage: %prog [options] factFile1 factFile2 ... factFileN')

    op.add_option(
        '--why',
        default=None,
        help='Specifies the goals to solve for using the non-naive methods' +
        'see --method')

    op.add_option(
        '--closure',
        action='store_true',
        default=False,
        help='Whether or not to serialize the inferred triples' +
        ' along with the original triples.  Otherwise ' +
        '(the default behavior), serialize only the inferred triples')

    op.add_option(
        '--imports',
        action='store_true',
        default=False,
        help='Whether or not to follow owl:imports in the fact graph')

    op.add_option(
        '--output',
        default='n3',
        metavar='RDF_FORMAT',
        choices=[
            'xml', 'TriX', 'n3', 'pml', 'proof-graph', 'nt', 'rif', 'rif-xml',
            'conflict', 'man-owl'
        ],
        help=
        "Serialize the inferred triples and/or original RDF triples to STDOUT "
        +
        "using the specified RDF syntax ('xml', 'pretty-xml', 'nt', 'turtle', "
        +
        "or 'n3') or to print a summary of the conflict set (from the RETE " +
        "network) if the value of this option is 'conflict'.  If the the " +
        " value is 'rif' or 'rif-xml', Then the rules used for inference " +
        "will be serialized as RIF.  If the value is 'pml' and --why is used, "
        + " then the PML RDF statements are serialized.  If output is " +
        "'proof-graph then a graphviz .dot file of the proof graph is printed. "
        +
        "Finally if the value is 'man-owl', then the RDF facts are assumed " +
        "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default"
    )

    op.add_option(
        '--class',
        dest='classes',
        action='append',
        default=[],
        metavar='QNAME',
        help='Used with --output=man-owl to determine which ' +
        'classes within the entire OWL/RDF are targetted for serialization' +
        '.  Can be used more than once')

    op.add_option(
        '--hybrid',
        action='store_true',
        default=False,
        help='Used with with --method=bfp to determine whether or not to ' +
        'peek into the fact graph to identify predicates that are both ' +
        'derived and base.  This is expensive for large fact graphs' +
        'and is explicitely not used against SPARQL endpoints')

    op.add_option(
        '--property',
        action='append',
        dest='properties',
        default=[],
        metavar='QNAME',
        help='Used with --output=man-owl or --extract to determine which ' +
        'properties are serialized / extracted.  Can be used more than once')

    op.add_option(
        '--normalize',
        action='store_true',
        default=False,
        help=
        "Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"
        + "The default is %default")

    op.add_option(
        '--ddlGraph',
        default=False,
        help=
        "The location of a N3 Data Description document describing the IDB predicates"
    )

    op.add_option(
        '--input-format',
        default='xml',
        dest='inputFormat',
        metavar='RDF_FORMAT',
        choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
        help=
        "The format of the RDF document(s) which serve as the initial facts " +
        " for the RETE network. One of 'xml', 'n3', 'trix', 'nt', " +
        "or 'rdfa'.  The default is %default")

    op.add_option(
        '--safety',
        default='none',
        metavar='RULE_SAFETY',
        choices=['loose', 'strict', 'none'],
        help="Determines how to handle RIF Core safety.  A value of 'loose' " +
        " means that unsafe rules will be ignored.  A value of 'strict' " +
        " will cause a syntax exception upon any unsafe rule.  A value of " +
        "'none' (the default) does nothing")

    op.add_option(
        '--pDSemantics',
        action='store_true',
        default=False,
        help=
        'Used with --dlp to add pD semantics ruleset for semantics not covered '
        + 'by DLP but can be expressed in definite Datalog Logic Programming' +
        ' The default is %default')

    op.add_option(
        '--stdin',
        action='store_true',
        default=False,
        help=
        'Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default '
    )

    op.add_option(
        '--ns',
        action='append',
        default=[],
        metavar="PREFIX=URI",
        help='Register a namespace binding (QName prefix to a base URI).  This '
        + 'can be used more than once')

    op.add_option(
        '--rules',
        default=[],
        action='append',
        metavar='PATH_OR_URI',
        help='The Notation 3 documents to use as rulesets for the RETE network'
        + '.  Can be specified more than once')

    op.add_option('-d',
                  '--debug',
                  action='store_true',
                  default=True,
                  help='Include debugging output')

    op.add_option(
        '--strictness',
        default='defaultBase',
        metavar='DDL_STRICTNESS',
        choices=['loose', 'defaultBase', 'defaultDerived', 'harsh'],
        help=
        'Used with --why to specify whether to: *not* check if predicates are '
        +
        ' both derived and base (loose), if they are, mark as derived (defaultDerived) '
        +
        'or as base (defaultBase) predicates, else raise an exception (harsh)')

    op.add_option(
        '--method',
        default='naive',
        metavar='reasoning algorithm',
        choices=['gms', 'bfp', 'naive'],
        help='Used with --why to specify how to evaluate answers for query.  '
        + 'One of: gms, sld, bfp, naive')

    op.add_option(
        '--firstAnswer',
        default=False,
        action='store_true',
        help=
        'Used with --why to determine whether to fetch all answers or just ' +
        'the first')

    op.add_option(
        '--edb',
        default=[],
        action='append',
        metavar='EXTENSIONAL_DB_PREDICATE_QNAME',
        help=
        'Used with --why/--strictness=defaultDerived to specify which clashing '
        + 'predicate will be designated as a base predicate')

    op.add_option(
        '--idb',
        default=[],
        action='append',
        metavar='INTENSIONAL_DB_PREDICATE_QNAME',
        help=
        'Used with --why/--strictness=defaultBase to specify which clashing ' +
        'predicate will be designated as a derived predicate')

    op.add_option(
        '--hybridPredicate',
        default=[],
        action='append',
        metavar='PREDICATE_QNAME',
        help=
        'Used with --why to explicitely specify a hybrid predicate (in both ' +
        ' IDB and EDB) ')

    op.add_option(
        '--noMagic',
        default=[],
        action='append',
        metavar='DB_PREDICATE_QNAME',
        help='Used with --why to specify that the predicate shouldnt have its '
        + 'magic sets calculated')

    op.add_option(
        '--filter',
        action='append',
        default=[],
        metavar='PATH_OR_URI',
        help=
        'The Notation 3 documents to use as a filter (entailments do not particpate in network)'
    )

    op.add_option(
        '--ruleFacts',
        action='store_true',
        default=False,
        help="Determines whether or not to attempt to parse initial facts from "
        + "the rule graph.  The default is %default")

    op.add_option(
        '--builtins',
        default=False,
        metavar='PATH_TO_PYTHON_MODULE',
        help="The path to a python module with function definitions (and a " +
        "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations"
    )

    op.add_option(
        '--dlp',
        action='store_true',
        default=False,
        help=
        'Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default'
    )

    op.add_option(
        '--sparqlEndpoint',
        action='store_true',
        default=False,
        help=
        'Indicates that the sole argument is the URI of a SPARQL endpoint to query'
    )

    op.add_option(
        '--ontology',
        action='append',
        default=[],
        metavar='PATH_OR_URI',
        help=
        'The path to an OWL RDF/XML graph to use DLP to extract rules from ' +
        '(other wise, fact graph(s) are used)  ')

    op.add_option(
        '--ontologyFormat',
        default='xml',
        dest='ontologyFormat',
        metavar='RDF_FORMAT',
        choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
        help=
        "The format of the OWL RDF/XML graph specified via --ontology.  The default is %default"
    )

    op.add_option(
        '--builtinTemplates',
        default=None,
        metavar='N3_DOC_PATH_OR_URI',
        help=
        'The path to an N3 document associating SPARQL FILTER templates to ' +
        'rule builtins')

    op.add_option('--negation',
                  action='store_true',
                  default=False,
                  help='Extract negative rules?')

    op.add_option(
        '--normalForm',
        action='store_true',
        default=False,
        help='Whether or not to reduce DL axioms & LP rules to a normal form')
    (options, facts) = op.parse_args()

    nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref, nsUri = nsBind.split('=')
        nsBinds[pref] = nsUri

    namespace_manager = NamespaceManager(Graph())
    if options.sparqlEndpoint:
        factGraph = Graph(plugin.get('SPARQLStore', Store)(facts[0]))
        options.hybrid = False
    else:
        factGraph = Graph()
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts and not options.sparqlEndpoint:
            factGraph.parse(fileN, format='n3')
            print("Parsing RDF facts from ", fileN)
        if options.builtins:
            import imp
            userFuncs = imp.load_source('builtins', options.builtins)
            rs = HornFromN3(fileN,
                            additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
        else:
            rs = HornFromN3(fileN)
        nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)
        #ruleGraph.parse(fileN, format='n3')

    ruleSet.nsMapping = nsBinds

    for prefix, uri in list(nsBinds.items()):
        namespace_manager.bind(prefix, uri, override=False)
    closureDeltaGraph = Graph()
    closureDeltaGraph.namespace_manager = namespace_manager
    factGraph.namespace_manager = namespace_manager

    if not options.sparqlEndpoint:
        for fileN in facts:
            factGraph.parse(fileN, format=options.inputFormat)
            if options.imports:
                for owlImport in factGraph.objects(predicate=OWL_NS.imports):
                    factGraph.parse(owlImport)
                    print("Parsed Semantic Web Graph.. ", owlImport)

    if not options.sparqlEndpoint and facts:
        for pref, uri in factGraph.namespaces():
            nsBinds[pref] = uri

    if options.stdin:
        assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint"
        factGraph.parse(sys.stdin, format=options.inputFormat)

    #Normalize namespace mappings
    #prune redundant, rdflib-allocated namespace prefix mappings
    newNsMgr = NamespaceManager(factGraph)
    from FuXi.Rete.Util import CollapseDictionary
    for k, v in list(
            CollapseDictionary(
                dict([(k, v) for k, v in factGraph.namespaces()])).items()):
        newNsMgr.bind(k, v)
    factGraph.namespace_manager = newNsMgr

    if options.normalForm:
        NormalFormReduction(factGraph)

    if not options.sparqlEndpoint:
        workingMemory = generateTokenSet(factGraph)
    if options.builtins:
        import imp
        userFuncs = imp.load_source('builtins', options.builtins)
        rule_store, rule_graph, network = SetupRuleStore(
            makeNetwork=True, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
    else:
        rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    network.inferredFacts = closureDeltaGraph
    network.nsMap = nsBinds

    if options.dlp:
        from FuXi.DLP.DLNormalization import NormalFormReduction
        if options.ontology:
            ontGraph = Graph()
            for fileN in options.ontology:
                ontGraph.parse(fileN, format=options.ontologyFormat)
                for prefix, uri in ontGraph.namespaces():
                    nsBinds[prefix] = uri
                    namespace_manager.bind(prefix, uri, override=False)
                    if options.sparqlEndpoint:
                        factGraph.store.bind(prefix, uri)
        else:
            ontGraph = factGraph
        NormalFormReduction(ontGraph)
        dlp = network.setupDescriptionLogicProgramming(
            ontGraph,
            addPDSemantics=options.pDSemantics,
            constructNetwork=False,
            ignoreNegativeStratus=options.negation,
            safety=safetyNameMap[options.safety])
        ruleSet.formulae.extend(dlp)
    if options.output == 'rif' and not options.why:
        for rule in ruleSet:
            print(rule)
        if options.negation:
            for nRule in network.negRules:
                print(nRule)

    elif options.output == 'man-owl':
        cGraph = network.closureGraph(factGraph, readOnly=False)
        cGraph.namespace_manager = namespace_manager
        Individual.factoryGraph = cGraph
        if options.classes:
            mapping = dict(namespace_manager.namespaces())
            for c in options.classes:
                pref, uri = c.split(':')
                print(Class(URIRef(mapping[pref] + uri)).__repr__(True))
        elif options.properties:
            mapping = dict(namespace_manager.namespaces())
            for p in options.properties:
                pref, uri = p.split(':')
                print(Property(URIRef(mapping[pref] + uri)))
        else:
            for p in AllProperties(cGraph):
                print(p.identifier, first(p.label))
                print(repr(p))
            for c in AllClasses(cGraph):
                if options.normalize:
                    if c.isPrimitive():
                        primAnc = [
                            sc for sc in c.subClassOf if sc.isPrimitive()
                        ]
                        if len(primAnc) > 1:
                            warnings.warn(
                                "Branches of primitive skeleton taxonomy" +
                                " should form trees: %s has %s primitive parents: %s"
                                % (c.qname, len(primAnc), primAnc),
                                UserWarning, 1)
                        children = [desc for desc in c.subSumpteeIds()]
                        for child in children:
                            for otherChild in [
                                    o for o in children if o is not child
                            ]:
                                if not otherChild in [
                                        c.identifier
                                        for c in Class(child).disjointWith
                                ]:  # and \
                                    warnings.warn(
                                        "Primitive children (of %s) " % (c.qname) + \
                                        "must be mutually disjoint: %s and %s" % (
                                    Class(child).qname, Class(otherChild).qname), UserWarning, 1)
                # if not isinstance(c.identifier, BNode):
                print(c.__repr__(True))

    if not options.why:
        # Naive construction of graph
        for rule in ruleSet:
            network.buildNetworkFromClause(rule)

    magicSeeds = []
    if options.why:
        builtinTemplateGraph = Graph()
        if options.builtinTemplates:
            builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                 format='n3')
        factGraph.templateMap = \
            dict([(pred, template)
                      for pred, _ignore, template in
                            builtinTemplateGraph.triples(
                                (None,
                                 TEMPLATES.filterTemplate,
                                 None))])
        goals = []
        query = ParseSPARQL(options.why)
        network.nsMap['pml'] = PML
        network.nsMap['gmp'] = GMP_NS
        network.nsMap['owl'] = OWL_NS
        nsBinds.update(network.nsMap)
        network.nsMap = nsBinds
        if not query.prologue:
            query.prologue = Prologue(None, [])
            query.prologue.prefixBindings.update(nsBinds)
        else:
            for prefix, nsInst in list(nsBinds.items()):
                if prefix not in query.prologue.prefixBindings:
                    query.prologue.prefixBindings[prefix] = nsInst
        print("query.prologue", query.prologue)
        print("query.query", query.query)
        print("query.query.whereClause", query.query.whereClause)
        print("query.query.whereClause.parsedGraphPattern",
              query.query.whereClause.parsedGraphPattern)
        goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern(
            query.query.whereClause.parsedGraphPattern,
            query.prologue).patterns])
        # dPreds=[]# p for s, p, o in goals ]
        # print("goals", goals)
        magicRuleNo = 0
        bottomUpDerivedPreds = []
        # topDownDerivedPreds  = []
        defaultBasePreds = []
        defaultDerivedPreds = set()
        hybridPredicates = []
        mapping = dict(newNsMgr.namespaces())
        for edb in options.edb:
            pref, uri = edb.split(':')
            defaultBasePreds.append(URIRef(mapping[pref] + uri))
        noMagic = []
        for pred in options.noMagic:
            pref, uri = pred.split(':')
            noMagic.append(URIRef(mapping[pref] + uri))
        if options.ddlGraph:
            ddlGraph = Graph().parse(options.ddlGraph, format='n3')
            # @TODO: should also get hybrid predicates from DDL graph
            defaultDerivedPreds = IdentifyDerivedPredicates(
                ddlGraph, Graph(), ruleSet)
        else:
            for idb in options.idb:
                pref, uri = idb.split(':')
                defaultDerivedPreds.add(URIRef(mapping[pref] + uri))
            defaultDerivedPreds.update(
                set([p == RDF.type and o or p for s, p, o in goals]))
            for hybrid in options.hybridPredicate:
                pref, uri = hybrid.split(':')
                hybridPredicates.append(URIRef(mapping[pref] + uri))

        if options.method == 'gms':
            for goal in goals:
                goalSeed = AdornLiteral(goal).makeMagicPred()
                print("Magic seed fact (used in bottom-up evaluation)",
                      goalSeed)
                magicSeeds.append(goalSeed.toRDFTuple())
            if noMagic:
                print("Predicates whose magic sets will not be calculated")
                for p in noMagic:
                    print("\t", factGraph.qname(p))
            for rule in MagicSetTransformation(
                    factGraph,
                    ruleSet,
                    goals,
                    derivedPreds=bottomUpDerivedPreds,
                    strictCheck=nameMap[options.strictness],
                    defaultPredicates=(defaultBasePreds, defaultDerivedPreds),
                    noMagic=noMagic):
                magicRuleNo += 1
                network.buildNetworkFromClause(rule)
            if len(list(ruleSet)):
                print("reduction in size of program: %s (%s -> %s clauses)" %
                      (100 -
                       (float(magicRuleNo) / float(len(list(ruleSet)))) * 100,
                       len(list(ruleSet)), magicRuleNo))
            start = time.time()
            network.feedFactsToAdd(generateTokenSet(magicSeeds))
            if not [
                    rule for rule in factGraph.adornedProgram if len(rule.sip)
            ]:
                warnings.warn(
                    "Using GMS sideways information strategy with no " +
                    "information to pass from query.  Falling back to " +
                    "naive method over given facts and rules")
                network.feedFactsToAdd(workingMemory)
            sTime = time.time() - start
            if sTime > 1:
                sTimeStr = "%s seconds" % sTime
            else:
                sTime = sTime * 1000
                sTimeStr = "%s milli seconds" % sTime
            print("Time to calculate closure on working memory: ", sTimeStr)

            if options.output == 'rif':
                print("Rules used for bottom-up evaluation")
                if network.rules:
                    for clause in network.rules:
                        print(clause)
                else:
                    for clause in factGraph.adornedProgram:
                        print(clause)
            if options.output == 'conflict':
                network.reportConflictSet()

        elif options.method == 'bfp':
            topDownDPreds = defaultDerivedPreds
            if options.builtinTemplates:
                builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                     format='n3')
                builtinDict = dict([
                    (pred, template) for pred, _ignore, template in
                    builtinTemplateGraph.triples((None,
                                                  TEMPLATES.filterTemplate,
                                                  None))
                ])
            else:
                builtinDict = None
            topDownStore = TopDownSPARQLEntailingStore(
                factGraph.store,
                factGraph,
                idb=ruleSet,
                DEBUG=options.debug,
                derivedPredicates=topDownDPreds,
                templateMap=builtinDict,
                nsBindings=network.nsMap,
                identifyHybridPredicates=options.hybrid
                if options.method == 'bfp' else False,
                hybridPredicates=hybridPredicates)
            targetGraph = Graph(topDownStore)
            for pref, nsUri in list(network.nsMap.items()):
                targetGraph.bind(pref, nsUri)
            start = time.time()
            # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal) for goal in goals],
            #                         targetGraph)
            # query = queryLiteral.asSPARQL()
            # print("Goal to solve ", query)
            sTime = time.time() - start
            result = targetGraph.query(options.why, initNs=network.nsMap)
            if result.askAnswer:
                sTime = time.time() - start
                if sTime > 1:
                    sTimeStr = "%s seconds" % sTime
                else:
                    sTime = sTime * 1000
                    sTimeStr = "%s milli seconds" % sTime
                print("Time to reach answer ground goal answer of %s: %s" %
                      (result.askAnswer[0], sTimeStr))
            else:
                for rt in result:
                    sTime = time.time() - start
                    if sTime > 1:
                        sTimeStr = "%s seconds" % sTime
                    else:
                        sTime = sTime * 1000
                        sTimeStr = "%s milli seconds" % sTime
                    if options.firstAnswer:
                        break
                    print(
                        "Time to reach answer %s via top-down SPARQL sip strategy: %s"
                        % (rt, sTimeStr))
            if options.output == 'conflict' and options.method == 'bfp':
                for _network, _goal in topDownStore.queryNetworks:
                    print(network, _goal)
                    _network.reportConflictSet(options.debug)
                for query in topDownStore.edbQueries:
                    print(query.asSPARQL())

    elif options.method == 'naive':
        start = time.time()
        network.feedFactsToAdd(workingMemory)
        sTime = time.time() - start
        if sTime > 1:
            sTimeStr = "%s seconds" % sTime
        else:
            sTime = sTime * 1000
            sTimeStr = "%s milli seconds" % sTime
        print("Time to calculate closure on working memory: ", sTimeStr)
        print(network)
        if options.output == 'conflict':
            network.reportConflictSet()

    for fileN in options.filter:
        for rule in HornFromN3(fileN):
            network.buildFilterNetworkFromClause(rule)

    if options.negation and network.negRules and options.method in [
            'both', 'bottomUp'
    ]:
        now = time.time()
        rt = network.calculateStratifiedModel(factGraph)
        print(
            "Time to calculate stratified, stable model (inferred %s facts): %s"
            % (rt, time.time() - now))
    if options.filter:
        print("Applying filter to entailed facts")
        network.inferredFacts = network.filteredFacts

    if options.closure and options.output in RDF_SERIALIZATION_FORMATS:
        cGraph = network.closureGraph(factGraph)
        cGraph.namespace_manager = namespace_manager
        print(
            cGraph.serialize(destination=None,
                             format=options.output,
                             base=None))
    elif options.output and options.output in RDF_SERIALIZATION_FORMATS:
        print(
            network.inferredFacts.serialize(destination=None,
                                            format=options.output,
                                            base=None))
Exemple #4
0
def main():
    from optparse import OptionParser
    op = OptionParser(
      'usage: %prog [options] factFile1 factFile2 ... factFileN')
    op.add_option('--why',
                  default=None,
      help='Specifies the goals to solve for using the non-niave methods' +
              'see --method')
    op.add_option('--closure',
                  action='store_true',
                  default=False,
      help='Whether or not to serialize the inferred triples' +
             ' along with the original triples.  Otherwise ' +
              '(the default behavior), serialize only the inferred triples')
    op.add_option('--imports',
                action='store_true',
                default=False,
    help='Whether or not to follow owl:imports in the fact graph')
    op.add_option('--output',
                  default='n3',
                  metavar='RDF_FORMAT',
                  choices=['xml',
                             'TriX',
                             'n3',
                             'pml',
                             'proof-graph',
                             'nt',
                             'rif',
                             'rif-xml',
                             'conflict',
                             'man-owl'],
      help="Serialize the inferred triples and/or original RDF triples to STDOUT "+
             "using the specified RDF syntax ('xml','pretty-xml','nt','turtle', "+
             "or 'n3') or to print a summary of the conflict set (from the RETE "+
             "network) if the value of this option is 'conflict'.  If the the "+
             " value is 'rif' or 'rif-xml', Then the rules used for inference "+
             "will be serialized as RIF.  If the value is 'pml' and --why is used, "+
             " then the PML RDF statements are serialized.  If output is "+
             "'proof-graph then a graphviz .dot file of the proof graph is printed. "+
             "Finally if the value is 'man-owl', then the RDF facts are assumed "+
             "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default")
    op.add_option('--class',
                  dest='classes',
                  action='append',
                  default=[],
                  metavar='QNAME',
      help='Used with --output=man-owl to determine which '+
             'classes within the entire OWL/RDF are targetted for serialization'+
             '.  Can be used more than once')
    op.add_option('--hybrid',
                  action='store_true',
                  default=False,
      help='Used with with --method=bfp to determine whether or not to '+
             'peek into the fact graph to identify predicates that are both '+
             'derived and base.  This is expensive for large fact graphs'+
             'and is explicitely not used against SPARQL endpoints')
    op.add_option('--property',
                  action='append',
                  dest='properties',
                  default=[],
                  metavar='QNAME',
      help='Used with --output=man-owl or --extract to determine which '+
             'properties are serialized / extracted.  Can be used more than once')
    op.add_option('--normalize',
                  action='store_true',
                  default=False,
      help="Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"+
      "The default is %default")
    op.add_option('--ddlGraph',
                default=False,
      help="The location of a N3 Data Description document describing the IDB predicates")
    op.add_option('--input-format',
                  default='xml',
                  dest='inputFormat',
                  metavar='RDF_FORMAT',
                  choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
      help="The format of the RDF document(s) which serve as the initial facts "+
             " for the RETE network. One of 'xml','n3','trix', 'nt', "+
             "or 'rdfa'.  The default is %default")
    op.add_option('--safety',
                  default='none',
                  metavar='RULE_SAFETY',
                  choices=['loose', 'strict', 'none'],
      help="Determines how to handle RIF Core safety.  A value of 'loose' "+
             " means that unsafe rules will be ignored.  A value of 'strict' "+
             " will cause a syntax exception upon any unsafe rule.  A value of "+
             "'none' (the default) does nothing")
    op.add_option('--pDSemantics',
                  action='store_true',
                  default=False,
      help='Used with --dlp to add pD semantics ruleset for semantics not covered '+
      'by DLP but can be expressed in definite Datalog Logic Programming'+
      ' The default is %default')
    op.add_option('--stdin',
                  action='store_true',
                  default=False,
      help='Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ')
    op.add_option('--ns',
                  action='append',
                  default=[],
                  metavar="PREFIX=URI",
      help='Register a namespace binding (QName prefix to a base URI).  This '+
             'can be used more than once')
    op.add_option('--rules',
                  default=[],
                  action='append',
                  metavar='PATH_OR_URI',
      help='The Notation 3 documents to use as rulesets for the RETE network'+
      '.  Can be specified more than once')
    op.add_option('-d', '--debug', action='store_true', default=False,
      help='Include debugging output')
    op.add_option('--strictness',
                  default='defaultBase',
                  metavar='DDL_STRICTNESS',
                  choices=['loose',
                             'defaultBase',
                             'defaultDerived',
                             'harsh'],
      help='Used with --why to specify whether to: *not* check if predicates are '+
      ' both derived and base (loose), if they are, mark as derived (defaultDerived) '+
      'or as base (defaultBase) predicates, else raise an exception (harsh)')
    op.add_option('--method',
                  default='naive',
                  metavar='reasoning algorithm',
                  choices=['gms', 'bfp', 'naive'],
      help='Used with --why to specify how to evaluate answers for query.  '+
      'One of: gms,bfp,naive')
    op.add_option('--firstAnswer',
                  default=False,
                  action='store_true',
      help='Used with --why to determine whether to fetch all answers or just '+
      'the first')
    op.add_option('--edb',
                  default=[],
                  action='append',
                  metavar='EXTENSIONAL_DB_PREDICATE_QNAME',
      help='Used with --why/--strictness=defaultDerived to specify which clashing '+
      'predicate will be designated as a base predicate')
    op.add_option('--idb',
                  default=[],
                  action='append',
                  metavar='INTENSIONAL_DB_PREDICATE_QNAME',
      help='Used with --why/--strictness=defaultBase to specify which clashing '+
      'predicate will be designated as a derived predicate')
    op.add_option('--hybridPredicate',
                default=[],
                action='append',
                metavar='PREDICATE_QNAME',
    help='Used with --why to explicitely specify a hybrid predicate (in both '+
           ' IDB and EDB) ')

    op.add_option('--noMagic',
                  default=[],
                  action='append',
                  metavar='DB_PREDICATE_QNAME',
      help='Used with --why to specify that the predicate shouldnt have its '+
      'magic sets calculated')
    op.add_option('--filter',
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
      help='The Notation 3 documents to use as a filter (entailments do not particpate in network)')
    op.add_option('--ruleFacts',
                  action='store_true',
                  default=False,
      help="Determines whether or not to attempt to parse initial facts from "+
      "the rule graph.  The default is %default")
    op.add_option('--builtins',
                  default=False,
                  metavar='PATH_TO_PYTHON_MODULE',
      help="The path to a python module with function definitions (and a "+
      "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations")
    op.add_option('--dlp',
                  action='store_true',
                  default=False,
      help='Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default')
    op.add_option('--sparqlEndpoint',
                action='store_true',
                default=False,
    help='Indicates that the sole argument is the URI of a SPARQL endpoint to query')

    op.add_option('--ontology',
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
      help='The path to an OWL RDF/XML graph to use DLP to extract rules from '+
      '(other wise, fact graph(s) are used)  ')

    op.add_option('--ruleFormat',
        default='n3',
        dest='ruleFormat',
        metavar='RULE_FORMAT',
        choices=['n3', 'rif'],
        help="The format of the rules to parse ('n3', 'rif').  The default is %default")

    op.add_option('--ontologyFormat',
                default='xml',
                dest='ontologyFormat',
                metavar='RDF_FORMAT',
                choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
    help="The format of the OWL RDF/XML graph specified via --ontology.  The default is %default")

    op.add_option('--builtinTemplates',
                  default=None,
                  metavar='N3_DOC_PATH_OR_URI',
      help='The path to an N3 document associating SPARQL FILTER templates to '+
      'rule builtins')
    op.add_option('--negation',
                  action='store_true',
                  default=False,
      help='Extract negative rules?')
    op.add_option('--normalForm',
                  action='store_true',
                  default=False,
      help='Whether or not to reduce DL axioms & LP rules to a normal form')
    (options, facts) = op.parse_args()

    nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref, nsUri = nsBind.split('=')
        nsBinds[pref]=nsUri

    namespace_manager = NamespaceManager(Graph())
    if options.sparqlEndpoint:
        factGraph = Graph(plugin.get('SPARQL', Store)(facts[0]))
        options.hybrid = False
    else:
        factGraph = Graph()
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts and not options.sparqlEndpoint:
            factGraph.parse(fileN, format='n3')
            print("Parsing RDF facts from %s" % fileN)
        if options.builtins:
            import imp
            userFuncs = imp.load_source('builtins', options.builtins)
            rs = HornFromN3(fileN,
                            additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
            nsBinds.update(rs.nsMapping)
        elif options.ruleFormat == 'rif':
            try:
                from FuXi.Horn.RIFCore import RIFCoreParser
                rif_parser = RIFCoreParser(location=fileN, debug=options.debug)
                rs = rif_parser.getRuleset()
            except ImportError:
                raise Exception(
                    "Missing 3rd party libraries for RIF processing"
                )
        else:
            rs = HornFromN3(fileN)
        nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)
        #ruleGraph.parse(fileN,format='n3')

    ruleSet.nsMapping = nsBinds

    for prefix, uri in list(nsBinds.items()):
        namespace_manager.bind(prefix, uri, override=False)
    closureDeltaGraph = Graph()
    closureDeltaGraph.namespace_manager = namespace_manager
    factGraph.namespace_manager = namespace_manager

    if not options.sparqlEndpoint:
        for fileN in facts:
            factGraph.parse(fileN, format=options.inputFormat)
            if options.imports:
                for owlImport in factGraph.objects(predicate=OWL_NS.imports):
                    factGraph.parse(owlImport)
                    print("Parsed Semantic Web Graph.. %s" % owlImport)

    if not options.sparqlEndpoint and facts:
        for pref, uri in factGraph.namespaces():
            nsBinds[pref]=uri

    if options.stdin:
        assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint"
        factGraph.parse(sys.stdin, format=options.inputFormat)

    #Normalize namespace mappings
    #prune redundant, rdflib-allocated namespace prefix mappings
    newNsMgr = NamespaceManager(factGraph)
    from FuXi.Rete.Util import CollapseDictionary
    for k, v in list(CollapseDictionary(dict([(k, v)
                                    for k, v in factGraph.namespaces()])).items()):
        newNsMgr.bind(k, v)
    factGraph.namespace_manager = newNsMgr

    if options.normalForm:
        NormalFormReduction(factGraph)

    if not options.sparqlEndpoint:
        workingMemory = generateTokenSet(factGraph)
    if options.builtins:
        import imp
        userFuncs = imp.load_source('builtins', options.builtins)
        rule_store, rule_graph, network = SetupRuleStore(
                             makeNetwork=True,
                             additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
    else:
        rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    network.inferredFacts = closureDeltaGraph
    network.nsMap = nsBinds

    if options.dlp:
        from FuXi.DLP.DLNormalization import NormalFormReduction
        if options.ontology:
            ontGraph = Graph()
            for fileN in options.ontology:
                ontGraph.parse(fileN, format=options.ontologyFormat)
                for prefix, uri in ontGraph.namespaces():
                    nsBinds[prefix] = uri
                    namespace_manager.bind(prefix, uri, override=False)
                    if options.sparqlEndpoint:
                        factGraph.store.bind(prefix, uri)
        else:
            ontGraph=factGraph
        NormalFormReduction(ontGraph)
        dlp=network.setupDescriptionLogicProgramming(
                                 ontGraph,
                                 addPDSemantics=options.pDSemantics,
                                 constructNetwork=False,
                                 ignoreNegativeStratus=options.negation,
                                 safety=safetyNameMap[options.safety])
        ruleSet.formulae.extend(dlp)
    if options.output == 'rif' and not options.why:
        for rule in ruleSet:
            print(rule)
        if options.negation:
            for nRule in network.negRules:
                print(nRule)

    elif options.output == 'man-owl':
        cGraph = network.closureGraph(factGraph, readOnly=False)
        cGraph.namespace_manager = namespace_manager
        Individual.factoryGraph = cGraph
        if options.classes:
            mapping = dict(namespace_manager.namespaces())
            for c in options.classes:
                pref, uri = c.split(':')
                print(Class(URIRef(mapping[pref] + uri)).__repr__(True))
        elif options.properties:
            mapping = dict(namespace_manager.namespaces())
            for p in options.properties:
                pref, uri = p.split(':')
                print(Property(URIRef(mapping[pref] + uri)))
        else:
            for p in AllProperties(cGraph):
                print(p.identifier, first(p.label))
                print(repr(p))
            for c in AllClasses(cGraph):
                if options.normalize:
                    if c.isPrimitive():
                        primAnc = [sc for sc in c.subClassOf if sc.isPrimitive()]
                        if len(primAnc) > 1:
                            warnings.warn("Branches of primitive skeleton taxonomy" +
                              " should form trees: %s has %s primitive parents: %s" % (
                             c.qname, len(primAnc), primAnc), UserWarning, 1)
                        children = [desc for desc in c.subSumpteeIds()]
                        for child in children:
                            for otherChild in [o for o in children if o is not child]:
                                if not otherChild in [c.identifier
                                          for c in Class(child).disjointWith]:  # and\
                                    warnings.warn("Primitive children (of %s) " % (c.qname) +
                                          "must be mutually disjoint: %s and %s" % (
                                      Class(child).qname,
                                      Class(otherChild).qname), UserWarning, 1)
                # if not isinstance(c.identifier,BNode):
                print(c.__repr__(True))

    if not options.why:
        #Naive construction of graph
        for rule in ruleSet:
            network.buildNetworkFromClause(rule)

    magicSeeds=[]
    if options.why:
        builtinTemplateGraph = Graph()
        if options.builtinTemplates:
            builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                format='n3')
        factGraph.templateMap = \
            dict([(pred, template)
                      for pred, _ignore, template in
                            builtinTemplateGraph.triples(
                                (None,
                                 TEMPLATES.filterTemplate,
                                 None))])
        goals = []
        query = ParseSPARQL(options.why)
        network.nsMap['pml'] = PML
        network.nsMap['gmp'] = GMP_NS
        network.nsMap['owl'] = OWL_NS
        nsBinds.update(network.nsMap)
        network.nsMap = nsBinds
        if not query.prolog:
                query.prolog = Prolog(None, [])
                query.prolog.prefixBindings.update(nsBinds)
        else:
            for prefix, nsInst in list(nsBinds.items()):
                if prefix not in query.prolog.prefixBindings:
                    query.prolog.prefixBindings[prefix] = nsInst
        goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern(
                                    query.query.whereClause.parsedGraphPattern,
                                    query.prolog).patterns])
        # dPreds=[]# p for s,p,o in goals ]
        magicRuleNo = 0
        bottomUpDerivedPreds = []
        # topDownDerivedPreds  = []
        defaultBasePreds = []
        defaultDerivedPreds = set()
        hybridPredicates = []
        mapping = dict(newNsMgr.namespaces())
        for edb in options.edb:
            pref, uri = edb.split(':')
            defaultBasePreds.append(URIRef(mapping[pref] + uri))
        noMagic = []
        for pred in options.noMagic:
            pref, uri = pred.split(':')
            noMagic.append(URIRef(mapping[pref] + uri))
        if options.ddlGraph:
            ddlGraph = Graph().parse(options.ddlGraph, format='n3')
            # @TODO: should also get hybrid predicates from DDL graph
            defaultDerivedPreds=IdentifyDerivedPredicates(
                                    ddlGraph,
                                    Graph(),
                                    ruleSet)
        else:
            for idb in options.idb:
                pref, uri = idb.split(':')
                defaultDerivedPreds.add(URIRef(mapping[pref] + uri))
            defaultDerivedPreds.update(
                set([p == RDF.type and o or p for s, p, o in goals]))
            for hybrid in options.hybridPredicate:
                pref, uri = hybrid.split(':')
                hybridPredicates.append(URIRef(mapping[pref]+uri))

        if options.method == 'gms':
            for goal in goals:
                goalSeed=AdornLiteral(goal).makeMagicPred()
                print("Magic seed fact (used in bottom-up evaluation) %s" % goalSeed)
                magicSeeds.append(goalSeed.toRDFTuple())
            if noMagic:
                print("Predicates whose magic sets will not be calculated")
                for p in noMagic:
                    print("\t%s" % factGraph.qname(p))
            for rule in MagicSetTransformation(
                                       factGraph,
                                       ruleSet,
                                       goals,
                                       derivedPreds=bottomUpDerivedPreds,
                                       strictCheck=nameMap[options.strictness],
                                       defaultPredicates=(defaultBasePreds,
                                                          defaultDerivedPreds),
                                       noMagic=noMagic):
                magicRuleNo+=1
                network.buildNetworkFromClause(rule)
            if len(list(ruleSet)):
                print("reduction in size of program: %s (%s -> %s clauses)" % (
                                           100 - (float(magicRuleNo) /
                                                  float(len(list(ruleSet)))
                                                  ) * 100,
                                           len(list(ruleSet)),
                                           magicRuleNo))
            start = time.time()
            network.feedFactsToAdd(generateTokenSet(magicSeeds))
            if not [
                rule for rule in factGraph.adornedProgram if len(rule.sip)]:
                warnings.warn(
                    "Using GMS sideways information strategy with no "+
                      "information to pass from query.  Falling back to "+
                      "naive method over given facts and rules")
                network.feedFactsToAdd(workingMemory)
            sTime = time.time() - start
            if sTime > 1:
                sTimeStr = "%s seconds"%sTime
            else:
                sTime = sTime * 1000
                sTimeStr = "%s milli seconds"%sTime
            print("Time to calculate closure on working memory: %s" % sTimeStr)

            if options.output == 'rif':
                print("Rules used for bottom-up evaluation")
                if network.rules:
                    for clause in network.rules:
                        print(clause)
                else:
                    for clause in factGraph.adornedProgram:
                        print(clause)
            if options.output == 'conflict':
                network.reportConflictSet()

        elif options.method == 'bfp':
            topDownDPreds = defaultDerivedPreds
            if options.builtinTemplates:
                builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                    format='n3')
                builtinDict = dict([(pred, template)
                              for pred, _ignore, template in
                                    builtinTemplateGraph.triples(
                                        (None,
                                         TEMPLATES.filterTemplate,
                                         None))])
            else:
                builtinDict = None
            topDownStore=TopDownSPARQLEntailingStore(
                            factGraph.store,
                            factGraph,
                            idb=ruleSet,
                            DEBUG=options.debug,
                            derivedPredicates=topDownDPreds,
                            templateMap=builtinDict,
                            nsBindings=network.nsMap,
                            identifyHybridPredicates=options.hybrid \
                                    if options.method == 'bfp' else False,
                            hybridPredicates=hybridPredicates)
            targetGraph = Graph(topDownStore)
            for pref, nsUri in list(network.nsMap.items()):
                targetGraph.bind(pref, nsUri)
            start = time.time()
            # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal)
            #                                   for goal in goals],
            #                         targetGraph)
            # query = queryLiteral.asSPARQL()
            # print >>sys.stderr, "Goal to solve ", query
            sTime = time.time() - start
            result = targetGraph.query(options.why, initNs=network.nsMap)
            if result.askAnswer:
                sTime = time.time() - start
                if sTime > 1:
                    sTimeStr = "%s seconds"%sTime
                else:
                    sTime = sTime * 1000
                    sTimeStr = "%s milli seconds"%sTime
                print("Time to reach answer ground goal answer of %s: %s" % (
                      result.askAnswer[0], sTimeStr))
            else:
                for rt in result:
                    sTime = time.time() - start
                    if sTime > 1:
                        sTimeStr = "%s seconds" % sTime
                    else:
                        sTime = sTime * 1000
                        sTimeStr = "%s milli seconds" % sTime
                    if options.firstAnswer:
                        break
                    print(
                    "Time to reach answer %s via top-down SPARQL sip strategy: %s" % (
                    rt, sTimeStr))
            if options.output == 'conflict' and options.method == 'bfp':
                for _network, _goal in topDownStore.queryNetworks:
                    print(_network, _goal)
                    _network.reportConflictSet(options.debug)
                for query in topDownStore.edbQueries:
                    print(query.asSPARQL())

    elif options.method == 'naive':
        start = time.time()
        network.feedFactsToAdd(workingMemory)
        sTime = time.time() - start
        if sTime > 1:
            sTimeStr = "%s seconds"%sTime
        else:
            sTime = sTime * 1000
            sTimeStr = "%s milli seconds"%sTime
        print("Time to calculate closure on working memory: %s" % sTimeStr)
        print(network)
        if options.output == 'conflict':
            network.reportConflictSet()

    for fileN in options.filter:
        for rule in HornFromN3(fileN):
            network.buildFilterNetworkFromClause(rule)

    if options.negation and network.negRules and options.method in ['both',
                                                                    'bottomUp']:
        now=time.time()
        rt=network.calculateStratifiedModel(factGraph)
        print("Time to calculate stratified, stable model (inferred %s facts): %s" % (
                                    rt,
                                    time.time()-now))
    if options.filter:
        print("Applying filter to entailed facts")
        network.inferredFacts = network.filteredFacts

    if options.closure \
        and options.output in RDF_SERIALIZATION_FORMATS:
        cGraph = network.closureGraph(factGraph)
        cGraph.namespace_manager = namespace_manager
        print(cGraph.serialize(destination=None,
                               format=options.output,
                               base=None))
    elif options.output and options.output in RDF_SERIALIZATION_FORMATS:
        print(network.inferredFacts.serialize(destination=None,
                                              format=options.output,
                                              base=None))
Exemple #5
0
    def __init__(self, ruleStore, name=None,
                 initialWorkingMemory=None,
                 inferredTarget=None,
                 nsMap={},
                 graphVizOutFile=None,
                 dontFinalize=False,
                 goal=None):
        self.leanCheck = {}
        self.goal = goal
        self.nsMap = nsMap
        self.name = name and name or BNode()
        self.nodes = {}
        self.alphaPatternHash = {}
        self.ruleSet = set()
        for alphaPattern in xcombine(('1', '0'), ('1', '0'), ('1', '0')):
            self.alphaPatternHash[tuple(alphaPattern)] = {}
        if inferredTarget is None:
            self.inferredFacts = Graph()
            namespace_manager = NamespaceManager(self.inferredFacts)
            for k, v in list(nsMap.items()):
                namespace_manager.bind(k, v)
            self.inferredFacts.namespace_manager = namespace_manager
        else:
            self.inferredFacts = inferredTarget
        self.workingMemory = initialWorkingMemory and initialWorkingMemory or set()
        self.proofTracers = {}
        self.terminalNodes = set()
        self.instantiations = {}
        start = time.time()
        self.ruleStore = ruleStore
        self.justifications = {}
        self.dischargedBindings = {}
        if not dontFinalize:
            self.ruleStore._finalize()
        self.filteredFacts = Graph()

        # 'Universal truths' for a rule set are rules where the LHS is empty.
        # Rather than automatically adding them to the working set, alpha nodes are 'notified'
        # of them, so they can be checked for while performing inter element
        # tests.
        self.universalTruths = []
        from FuXi.Horn.HornRules import Ruleset
        self.rules = set()
        self.negRules = set()
        for rule in Ruleset(n3Rules=self.ruleStore.rules, nsMapping=self.nsMap):
            import warnings
            warnings.warn(
                "Rules in a network should be built *after* construction via " +
                " self.buildNetworkClause(HornFromN3(n3graph)) for instance",
                DeprecationWarning, 2)
            self.buildNetworkFromClause(rule)
        self.alphaNodes = [
            node for node in list(self.nodes.values()) if isinstance(node, AlphaNode)]
        self.alphaBuiltInNodes = [node for node in list(
            self.nodes.values()) if isinstance(node, BuiltInAlphaNode)]
        self._setupDefaultRules()
        if initialWorkingMemory:
            start = time.time()
            self.feedFactsToAdd(initialWorkingMemory)
            print("Time to calculate closure on working memory: %s m seconds" % (
                (time.time() - start) * 1000))
        if graphVizOutFile:
            print("Writing out RETE network to ", graphVizOutFile)
            renderNetwork(self, nsMap=nsMap).write(graphVizOutFile)
def main():
    from optparse import OptionParser
    op = OptionParser('usage: %prog [options] factFile1 factFile2 ... factFileN')
    op.add_option('--closure', 
                  action='store_true',
                  default=False,
      help = 'Whether or not to serialize the inferred triples'+ 
             ' along with the original triples.  Otherwise '+
              '(the default behavior), serialize only the inferred triples')
    op.add_option('--output', 
                  default='n3',
                  metavar='RDF_FORMAT',
                  choices = ['xml', 
                             'TriX', 
                             'n3', 
                             'nt',
                             'rif',
                             'rif-xml',
                             'conflict',
                             'man-owl'],
      help = "Serialize the inferred triples and/or original RDF triples to STDOUT "+
             "using the specified RDF syntax ('xml','pretty-xml','nt','turtle', "+
             "or 'n3') or to print a summary of the conflict set (from the RETE "+
             "network) if the value of this option is 'conflict'.  If the the "+
             " value is 'rif' or 'rif-xml', Then the rules used for inference "+
             "will be serialized as RIF.  Finally if the value is 'man-owl', then "+
             "the RDF facts are assumed to be OWL/RDF and serialized via Manchester OWL "+
             "syntax.  The default is %default")
    op.add_option('--class',
                  dest='classes',
                  action='append',
                  default=[],
                  metavar='QNAME', 
      help = 'Used with --output=man-owl to determine which '+
             'classes within the entire OWL/RDF are targetted for serialization'+
             '.  Can be used more than once')
    op.add_option('--property',
                  action='append',
                  dest='properties',
                  default=[],
                  metavar='QNAME', 
      help = 'Used with --output=man-owl or --extract to determine which '+
             'properties are serialized / extracted.  Can be used more than once')
    op.add_option('--normalize', 
                  action='store_true',
                  default=False,
      help = "Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"+
      "The default is %default")
    op.add_option('--input-format', 
                  default='xml',
                  dest='inputFormat',
                  metavar='RDF_FORMAT',
                  choices = ['xml', 'trix', 'n3', 'nt', 'rdfa'],
      help = "The format of the RDF document(s) which serve as the initial facts "+
             " for the RETE network. One of 'xml','n3','trix', 'nt', "+
             "or 'rdfa'.  The default is %default")
    op.add_option('--pDSemantics', 
                  action='store_true',
                  default=False,
      help = 'Used with --dlp to add pD semantics ruleset for semantics not covered '+
      'by DLP but can be expressed in definite Datalog Logic Programming'+
      ' The default is %default')
    op.add_option('--stdin', 
                  action='store_true',
                  default=False,
      help = 'Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ')
    op.add_option('--ns', 
                  action='append',
                  default=[],
                  metavar="PREFIX=URI",
      help = 'Register a namespace binding (QName prefix to a base URI).  This '+
             'can be used more than once')
    op.add_option('--rules', 
                  default=[],
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
      help = 'The Notation 3 documents to use as rulesets for the RETE network'+
      '.  Can be specified more than once')
    op.add_option('--ruleFacts', 
                  action='store_true',
                  default=False,
      help = "Determines whether or not to attempt to parse initial facts from "+
      "the rule graph.  The default is %default")
    op.add_option('--dlp', 
                  action='store_true',
                  default=False,
      help = 'Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default')
    (options, facts) = op.parse_args()
    
    nsBinds = {'iw':'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref,nsUri = nsBind.split('=')
        nsBinds[pref]=nsUri
    
    namespace_manager = NamespaceManager(Graph())
    factGraph = Graph() 
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts:
            factGraph.parse(fileN,format='n3')
            print >>sys.stderr,"Parsing RDF facts from ", fileN
        rs = HornFromN3(fileN)
        nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)
        #ruleGraph.parse(fileN,format='n3')
    ruleSet.nsMapping = nsBinds

    for prefix,uri in nsBinds.items():
        namespace_manager.bind(prefix, uri, override=False)
    closureDeltaGraph = Graph()
    closureDeltaGraph.namespace_manager = namespace_manager
    factGraph.namespace_manager = namespace_manager

    for fileN in facts:
        factGraph.parse(fileN,format=options.inputFormat)
        
    if options.stdin:
        factGraph.parse(sys.stdin,format=options.inputFormat)
                
    workingMemory = generateTokenSet(factGraph)

    rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    network.inferredFacts = closureDeltaGraph
    network.nsMap = nsBinds
    
    if options.dlp:
        dlp=setupDescriptionLogicProgramming(factGraph,
                                             addPDSemantics=options.pDSemantics,
                                             constructNetwork=False)        
        ruleSet.formulae.extend(dlp)
    if options.output == 'rif':
         for rule in ruleSet:
             print rule
    elif options.output == 'man-owl':
        cGraph = network.closureGraph(factGraph,readOnly=False)
        cGraph.namespace_manager = namespace_manager
        Individual.factoryGraph = cGraph
        if options.classes:
            mapping = dict(namespace_manager.namespaces())
            for c in options.classes:
                pref,uri=c.split(':')
                print Class(URIRef(mapping[pref]+uri)).__repr__(True)
        elif options.properties:
            mapping = dict(namespace_manager.namespaces())
            for p in options.properties:
                pref,uri=p.split(':')
                print Property(URIRef(mapping[pref]+uri))
        else:
            for p in AllProperties(cGraph):
                print p.identifier
                print repr(p)
            for c in AllClasses(cGraph):
                if options.normalize:
                    if c.isPrimitive():
                        primAnc = [sc for sc in c.subClassOf if sc.isPrimitive()] 
                        if len(primAnc)>1:
                            warnings.warn("Branches of primitive skeleton taxonomy"+
                              " should form trees: %s has %s primitive parents: %s"%(
                             c.qname,len(primAnc),primAnc),UserWarning,1)
                        children = [desc for desc in c.subSumpteeIds()]
                        for child in children:
                            for otherChild in [o for o in children if o is not child]:
                                if not otherChild in [c.identifier 
                                          for c in Class(child).disjointWith]:# and\
                                    warnings.warn("Primitive children (of %s) "+
                                          "must be mutually disjoint: %s and %s"%(
                                      c.qname,
                                      Class(child).qname,
                                      Class(otherChild).qname),UserWarning,1)
                if not isinstance(c.identifier,BNode):
                    print c.__repr__(True)
    for rule in ruleSet:
        network.buildNetworkFromClause(rule)
    start = time.time()  
    network.feedFactsToAdd(workingMemory)
    sTime = time.time() - start
    if sTime > 1:
        sTimeStr = "%s seconds"%sTime
    else:
        sTime = sTime * 1000
        sTimeStr = "%s milli seconds"%sTime
    print >>sys.stderr,"Time to calculate closure on working memory: ",sTimeStr
    print >>sys.stderr, network
    if options.output == 'conflict':
        network.reportConflictSet()
    elif options.output not in ['rif','rif-xml','man-owl']:
        if options.closure:
            cGraph = network.closureGraph(factGraph)
            cGraph.namespace_manager = namespace_manager
            print cGraph.serialize(destination=None, 
                                   format=options.output, 
                                   base=None)
        else:
            print network.inferredFacts.serialize(destination=None, 
                                                  format=options.output, 
                                                  base=None)
Exemple #7
0
def main():
    from optparse import OptionParser
    op = OptionParser('usage: %prog [options] factFile1 factFile2 ... factFileN')

    op.add_option('--why',
                  default=None,
                  help='Specifies the goals to solve for using the non-naive methods' +
                       'see --method')

    op.add_option('--closure',
                  action='store_true',
                  default=False,
                  help='Whether or not to serialize the inferred triples' +
                        ' along with the original triples.  Otherwise ' +
                        '(the default behavior), serialize only the inferred triples')

    op.add_option('--imports',
                action='store_true',
                default=False,
                help='Whether or not to follow owl:imports in the fact graph')

    op.add_option('--output',
                  default='n3',
                  metavar='RDF_FORMAT',
                  choices=['xml',
                             'TriX',
                             'n3',
                             'pml',
                             'proof-graph',
                             'nt',
                             'rif',
                             'rif-xml',
                             'conflict',
                             'man-owl'],
                  help="Serialize the inferred triples and/or original RDF triples to STDOUT " +
                        "using the specified RDF syntax ('xml', 'pretty-xml', 'nt', 'turtle', " +
                         "or 'n3') or to print a summary of the conflict set (from the RETE " +
                         "network) if the value of this option is 'conflict'.  If the the " +
                         " value is 'rif' or 'rif-xml', Then the rules used for inference " +
                         "will be serialized as RIF.  If the value is 'pml' and --why is used, " +
                         " then the PML RDF statements are serialized.  If output is " +
                         "'proof-graph then a graphviz .dot file of the proof graph is printed. " +
                         "Finally if the value is 'man-owl', then the RDF facts are assumed " +
                         "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default")

    op.add_option('--class',
                  dest='classes',
                  action='append',
                  default=[],
                  metavar='QNAME',
                  help='Used with --output=man-owl to determine which ' +
                         'classes within the entire OWL/RDF are targetted for serialization' +
                         '.  Can be used more than once')

    op.add_option('--hybrid',
                  action='store_true',
                  default=False,
                  help='Used with with --method=bfp to determine whether or not to ' +
                         'peek into the fact graph to identify predicates that are both ' +
                         'derived and base.  This is expensive for large fact graphs' +
                         'and is explicitely not used against SPARQL endpoints')

    op.add_option('--property',
                  action='append',
                  dest='properties',
                  default=[],
                  metavar='QNAME',
                  help='Used with --output=man-owl or --extract to determine which ' +
                         'properties are serialized / extracted.  Can be used more than once')

    op.add_option('--normalize',
                  action='store_true',
                  default=False,
                  help="Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]" +
                  "The default is %default")

    op.add_option('--ddlGraph',
                default=False,
                  help="The location of a N3 Data Description document describing the IDB predicates")

    op.add_option('--input-format',
                  default='xml',
                  dest='inputFormat',
                  metavar='RDF_FORMAT',
                  choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
                  help="The format of the RDF document(s) which serve as the initial facts " +
                         " for the RETE network. One of 'xml', 'n3', 'trix', 'nt', " +
                         "or 'rdfa'.  The default is %default")

    op.add_option('--safety',
                  default='none',
                  metavar='RULE_SAFETY',
                  choices=['loose', 'strict', 'none'],
                  help="Determines how to handle RIF Core safety.  A value of 'loose' " +
                         " means that unsafe rules will be ignored.  A value of 'strict' " +
                         " will cause a syntax exception upon any unsafe rule.  A value of " +
                         "'none' (the default) does nothing")

    op.add_option('--pDSemantics',
                  action='store_true',
                  default=False,
                  help='Used with --dlp to add pD semantics ruleset for semantics not covered ' +
                  'by DLP but can be expressed in definite Datalog Logic Programming' +
                  ' The default is %default')

    op.add_option('--stdin',
                  action='store_true',
                  default=False,
                  help='Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ')

    op.add_option('--ns',
                  action='append',
                  default=[],
                  metavar="PREFIX=URI",
                  help='Register a namespace binding (QName prefix to a base URI).  This ' +
                         'can be used more than once')

    op.add_option('--rules',
                  default=[],
                  action='append',
                  metavar='PATH_OR_URI',
                  help='The Notation 3 documents to use as rulesets for the RETE network' +
                  '.  Can be specified more than once')

    op.add_option('-d', '--debug', action='store_true', default=True,
                  help='Include debugging output')

    op.add_option('--strictness',
                  default='defaultBase',
                  metavar='DDL_STRICTNESS',
                  choices=['loose',
                             'defaultBase',
                             'defaultDerived',
                             'harsh'],
                  help='Used with --why to specify whether to: *not* check if predicates are ' +
                  ' both derived and base (loose), if they are, mark as derived (defaultDerived) ' +
                  'or as base (defaultBase) predicates, else raise an exception (harsh)')

    op.add_option('--method',
                  default='naive',
                  metavar='reasoning algorithm',
                  choices=['gms', 'bfp', 'naive'],
                  help='Used with --why to specify how to evaluate answers for query.  ' +
                  'One of: gms, sld, bfp, naive')

    op.add_option('--firstAnswer',
                  default=False,
                  action='store_true',
                  help='Used with --why to determine whether to fetch all answers or just ' +
                  'the first')

    op.add_option('--edb',
                  default=[],
                  action='append',
                  metavar='EXTENSIONAL_DB_PREDICATE_QNAME',
                  help='Used with --why/--strictness=defaultDerived to specify which clashing ' +
                  'predicate will be designated as a base predicate')

    op.add_option('--idb',
                  default=[],
                  action='append',
                  metavar='INTENSIONAL_DB_PREDICATE_QNAME',
                  help='Used with --why/--strictness=defaultBase to specify which clashing ' +
                  'predicate will be designated as a derived predicate')

    op.add_option('--hybridPredicate',
                default=[],
                action='append',
                metavar='PREDICATE_QNAME',
                help='Used with --why to explicitely specify a hybrid predicate (in both ' +
                       ' IDB and EDB) ')

    op.add_option('--noMagic',
                  default=[],
                  action='append',
                  metavar='DB_PREDICATE_QNAME',
                  help='Used with --why to specify that the predicate shouldnt have its ' +
                  'magic sets calculated')

    op.add_option('--filter',
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
                  help='The Notation 3 documents to use as a filter (entailments do not particpate in network)')

    op.add_option('--ruleFacts',
                  action='store_true',
                  default=False,
                  help="Determines whether or not to attempt to parse initial facts from " +
                  "the rule graph.  The default is %default")

    op.add_option('--builtins',
                  default=False,
                  metavar='PATH_TO_PYTHON_MODULE',
                  help="The path to a python module with function definitions (and a " +
                  "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations")

    op.add_option('--dlp',
                  action='store_true',
                  default=False,
                  help='Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default')

    op.add_option('--sparqlEndpoint',
                action='store_true',
                default=False,
                help='Indicates that the sole argument is the URI of a SPARQL endpoint to query')

    op.add_option('--ontology',
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
                  help='The path to an OWL RDF/XML graph to use DLP to extract rules from ' +
                  '(other wise, fact graph(s) are used)  ')

    op.add_option('--ontologyFormat',
                default='xml',
                dest='ontologyFormat',
                metavar='RDF_FORMAT',
                choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
                help="The format of the OWL RDF/XML graph specified via --ontology.  The default is %default")

    op.add_option('--builtinTemplates',
                  default=None,
                  metavar='N3_DOC_PATH_OR_URI',
                  help='The path to an N3 document associating SPARQL FILTER templates to ' +
                  'rule builtins')

    op.add_option('--negation',
                  action='store_true',
                  default=False,
                  help='Extract negative rules?')

    op.add_option('--normalForm',
                  action='store_true',
                  default=False,
                  help='Whether or not to reduce DL axioms & LP rules to a normal form')
    (options, facts) = op.parse_args()

    nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref, nsUri = nsBind.split('=')
        nsBinds[pref] = nsUri

    namespace_manager = NamespaceManager(Graph())
    if options.sparqlEndpoint:
        factGraph = Graph(plugin.get('SPARQLStore', Store)(facts[0]))
        options.hybrid = False
    else:
        factGraph = Graph()
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts and not options.sparqlEndpoint:
            factGraph.parse(fileN, format='n3')
            print("Parsing RDF facts from ", fileN)
        if options.builtins:
            import imp
            userFuncs = imp.load_source('builtins', options.builtins)
            rs = HornFromN3(fileN,
                            additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
        else:
            rs = HornFromN3(fileN)
        nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)
        #ruleGraph.parse(fileN, format='n3')

    ruleSet.nsMapping = nsBinds

    for prefix, uri in list(nsBinds.items()):
        namespace_manager.bind(prefix, uri, override=False)
    closureDeltaGraph = Graph()
    closureDeltaGraph.namespace_manager = namespace_manager
    factGraph.namespace_manager = namespace_manager

    if not options.sparqlEndpoint:
        for fileN in facts:
            factGraph.parse(fileN, format=options.inputFormat)
            if options.imports:
                for owlImport in factGraph.objects(predicate=OWL_NS.imports):
                    factGraph.parse(owlImport)
                    print("Parsed Semantic Web Graph.. ", owlImport)

        if facts:
            for pref, uri in factGraph.namespaces():
                nsBinds[pref] = uri

    if options.stdin:
        assert not options.sparqlEndpoint, (
            "Cannot use --stdin with --sparqlEndpoint")
        factGraph.parse(sys.stdin, format=options.inputFormat)

    # Normalize namespace mappings
    # prune redundant, rdflib-allocated namespace prefix mappings
    new_ns_mgr = NamespaceManager(factGraph)
    from FuXi.Rete.Util import CollapseDictionary
    for k, v in CollapseDictionary(dict([(k, v) for k, v in
                                   factGraph.namespaces()])).items():
        new_ns_mgr.bind(k, v)
    factGraph.namespace_manager = new_ns_mgr

    if options.normalForm:
        NormalFormReduction(factGraph)

    if not options.sparqlEndpoint:
        workingMemory = generateTokenSet(factGraph)
    if options.builtins:
        import imp
        userFuncs = imp.load_source('builtins', options.builtins)
        rule_store, rule_graph, network = SetupRuleStore(
            makeNetwork=True, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
    else:
        rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    network.inferredFacts = closureDeltaGraph
    network.nsMap = nsBinds

    if options.dlp:
        from FuXi.DLP.DLNormalization import NormalFormReduction
        if options.ontology:
            ontGraph = Graph()
            for fileN in options.ontology:
                ontGraph.parse(fileN, format=options.ontologyFormat)
                for prefix, uri in ontGraph.namespaces():
                    nsBinds[prefix] = uri
                    namespace_manager.bind(prefix, uri, override=False)
                    if options.sparqlEndpoint:
                        factGraph.store.bind(prefix, uri)
        else:
            ontGraph = factGraph
        NormalFormReduction(ontGraph)
        dlp = network.setupDescriptionLogicProgramming(
            ontGraph,
            addPDSemantics=options.pDSemantics,
            constructNetwork=False,
            ignoreNegativeStratus=options.negation,
            safety=safetyNameMap[options.safety])
        ruleSet.formulae.extend(dlp)
    if options.output == 'rif' and not options.why:
        for rule in ruleSet:
            print(rule)
        if options.negation:
            for nRule in network.negRules:
                print(nRule)

    elif options.output == 'man-owl':
        cGraph = network.closureGraph(factGraph, readOnly=False)
        cGraph.namespace_manager = namespace_manager
        Individual.factoryGraph = cGraph
        if options.classes:
            mapping = dict(namespace_manager.namespaces())
            for c in options.classes:
                pref, uri = c.split(':')
                print(Class(URIRef(mapping[pref] + uri)).__repr__(True))
        elif options.properties:
            mapping = dict(namespace_manager.namespaces())
            for p in options.properties:
                pref, uri = p.split(':')
                print(Property(URIRef(mapping[pref] + uri)))
        else:
            for p in AllProperties(cGraph):
                print(p.identifier, first(p.label))
                print(repr(p))
            for c in AllClasses(cGraph):
                if options.normalize:
                    if c.isPrimitive():
                        primAnc = [sc for sc in c.subClassOf if sc.isPrimitive()]
                        if len(primAnc) > 1:
                            warnings.warn("Branches of primitive skeleton taxonomy" +
                              " should form trees: %s has %s primitive parents: %s" % (
                             c.qname, len(primAnc), primAnc), UserWarning, 1)
                        children = [desc for desc in c.subSumpteeIds()]
                        for child in children:
                            for otherChild in [o for o in children if o is not child]:
                                if not otherChild in [c.identifier
                                          for c in Class(child).disjointWith]:  # and \
                                            warnings.warn(
                                                "Primitive children (of %s) " % (c.qname) + \
                                                "must be mutually disjoint: %s and %s" % (
                                      Class(child).qname, Class(otherChild).qname), UserWarning, 1)
                # if not isinstance(c.identifier, BNode):
                print(c.__repr__(True))

    if not options.why:
        # Naive construction of graph
        for rule in ruleSet:
            network.buildNetworkFromClause(rule)

    if options.why:
        why(options, factGraph, network, nsBinds, ruleSet, workingMemory)
    elif options.method == 'naive':
        start = time.time()
        network.feedFactsToAdd(workingMemory)
        sTime = time.time() - start
        if sTime > 1:
            sTimeStr = "%s seconds" % sTime
        else:
            sTime = sTime * 1000
            sTimeStr = "%s milli seconds" % sTime
        print("Time to calculate closure on working memory: ", sTimeStr)
        print(network)
        if options.output == 'conflict':
            network.reportConflictSet()

    for fileN in options.filter:
        for rule in HornFromN3(fileN):
            network.buildFilterNetworkFromClause(rule)

    if options.negation and network.negRules and options.method in [
            'both', 'bottomUp']:
        now = time.time()
        rt = network.calculateStratifiedModel(factGraph)
        print(
            "Time to calculate stratified, stable model"
            " (inferred %s facts): %s" % (rt, time.time() - now))
    if options.filter:
        print("Applying filter to entailed facts")
        network.inferredFacts = network.filteredFacts

    if options.closure and options.output in RDF_SERIALIZATION_FORMATS:
        cGraph = network.closureGraph(factGraph)
        cGraph.namespace_manager = namespace_manager
        print(cGraph.serialize(destination=None,
                               format=options.output,
                               base=None))
    elif options.output and options.output in RDF_SERIALIZATION_FORMATS:
        print(network.inferredFacts.serialize(destination=None,
                                              format=options.output,
                                              base=None))
Exemple #8
0
def main():
    from optparse import OptionParser
    op = OptionParser(
        'usage: %prog [options] factFile1 factFile2 ... factFileN')
    op.add_option('--why',
                  default=None,
                  help='Specifies the goals to solve for')
    op.add_option(
        '--closure',
        action='store_true',
        default=False,
        help='Whether or not to serialize the inferred triples' +
        ' along with the original triples.  Otherwise ' +
        '(the default behavior), serialize only the inferred triples')
    op.add_option(
        '--naive',
        action='store_true',
        default=False,
        help='Naively perform forward chaining over rules and facts using the '
        + 'RETE network')
    op.add_option(
        '--imports',
        action='store_true',
        default=False,
        help='Whether or not to follow owl:imports in the fact graph')
    op.add_option(
        '--output',
        default='n3',
        metavar='RDF_FORMAT',
        choices=[
            'xml', 'TriX', 'n3', 'pml', 'proof-graph', 'nt', 'rif', 'rif-xml',
            'conflict', 'man-owl'
        ],
        help=
        "Serialize the inferred triples and/or original RDF triples to STDOUT "
        +
        "using the specified RDF syntax ('xml','pretty-xml','nt','turtle', " +
        "or 'n3') or to print a summary of the conflict set (from the RETE " +
        "network) if the value of this option is 'conflict'.  If the the " +
        " value is 'rif' or 'rif-xml', Then the rules used for inference " +
        "will be serialized as RIF.  If the value is 'pml' and --why is used, "
        + " then the PML RDF statements are serialized.  If output is " +
        "'proof-graph then a graphviz .dot file of the proof graph is printed. "
        +
        "Finally if the value is 'man-owl', then the RDF facts are assumed " +
        "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default"
    )
    op.add_option(
        '--class',
        dest='classes',
        action='append',
        default=[],
        metavar='QNAME',
        help='Used with --output=man-owl to determine which ' +
        'classes within the entire OWL/RDF are targetted for serialization' +
        '.  Can be used more than once')
    op.add_option(
        '--hybrid',
        action='store_true',
        default=False,
        help='Used to determine whether or not to ' +
        'peek into the fact graph to identify predicates that are both ' +
        'derived and base.  This is expensive for large fact graphs' +
        'and is explicitely not used against SPARQL endpoints')
    op.add_option(
        '--property',
        action='append',
        dest='properties',
        default=[],
        metavar='QNAME',
        help='Used with --output=man-owl or --extract to determine which ' +
        'properties are serialized / extracted.  Can be used more than once')
    op.add_option(
        '--normalize',
        action='store_true',
        default=False,
        help=
        "Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"
        + "The default is %default")
    op.add_option(
        '--ddlGraph',
        default=False,
        help=
        "The location of a N3 Data Description document describing the IDB predicates"
    )
    op.add_option(
        '--input-format',
        default='xml',
        dest='inputFormat',
        metavar='RDF_FORMAT',
        choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
        help=
        "The format of the RDF document(s) which serve as the initial facts " +
        " for the RETE network. One of 'xml','n3','trix', 'nt', " +
        "or 'rdfa'.  The default is %default")
    op.add_option(
        '--safety',
        default='none',
        metavar='RULE_SAFETY',
        choices=['loose', 'strict', 'none'],
        help="Determines how to handle RIF Core safety.  A value of 'loose' " +
        " means that unsafe rules will be ignored.  A value of 'strict' " +
        " will cause a syntax exception upon any unsafe rule.  A value of " +
        "'none' (the default) does nothing")
    op.add_option(
        '--pDSemantics',
        action='store_true',
        default=False,
        help=
        'Used with --dlp to add pD semantics ruleset for semantics not covered '
        + 'by DLP but can be expressed in definite Datalog Logic Programming' +
        ' The default is %default')
    op.add_option(
        '--stdin',
        action='store_true',
        default=False,
        help=
        'Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default '
    )
    op.add_option(
        '--ns',
        action='append',
        default=[],
        metavar="PREFIX=URI",
        help='Register a namespace binding (QName prefix to a base URI).  This '
        + 'can be used more than once')
    op.add_option(
        '--rules',
        default=[],
        action='append',
        metavar='PATH_OR_URI',
        help='The Notation 3 documents to use as rulesets for the RETE network'
        + '.  Can be specified more than once')
    op.add_option('-d',
                  '--debug',
                  action='store_true',
                  default=False,
                  help='Include debugging output')
    op.add_option(
        '--strictness',
        default='defaultBase',
        metavar='DDL_STRICTNESS',
        choices=['loose', 'defaultBase', 'defaultDerived', 'harsh'],
        help=
        'Used with --why to specify whether to: *not* check if predicates are '
        +
        ' both derived and base (loose), if they are, mark as derived (defaultDerived) '
        +
        'or as base (defaultBase) predicates, else raise an exception (harsh)')
    op.add_option(
        '--firstAnswer',
        default=False,
        action='store_true',
        help=
        'Used with --why to determine whether to fetch all answers or just ' +
        'the first')
    op.add_option(
        '--edb',
        default=[],
        action='append',
        metavar='EXTENSIONAL_DB_PREDICATE_QNAME',
        help=
        'Used with --why/--strictness=defaultDerived to specify which clashing '
        + 'predicate will be designated as a base predicate')
    op.add_option(
        '--idb',
        default=[],
        action='append',
        metavar='INTENSIONAL_DB_PREDICATE_QNAME',
        help=
        'Used with --why/--strictness=defaultBase to specify which clashing ' +
        'predicate will be designated as a derived predicate')
    op.add_option(
        '--hybridPredicate',
        default=[],
        action='append',
        metavar='PREDICATE_QNAME',
        help=
        'Used with --why to explicitely specify a hybrid predicate (in both ' +
        ' IDB and EDB) ')

    op.add_option(
        '--noMagic',
        default=[],
        action='append',
        metavar='DB_PREDICATE_QNAME',
        help='Used with --why to specify that the predicate shouldnt have its '
        + 'magic sets calculated')
    op.add_option(
        '--filter',
        action='append',
        default=[],
        metavar='PATH_OR_URI',
        help=
        'The Notation 3 documents to use as a filter (entailments do not particpate in network)'
    )
    op.add_option(
        '--ruleFacts',
        action='store_true',
        default=False,
        help="Determines whether or not to attempt to parse initial facts from "
        + "the rule graph.  The default is %default")
    op.add_option(
        '--builtins',
        default=False,
        metavar='PATH_TO_PYTHON_MODULE',
        help="The path to a python module with function definitions (and a " +
        "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations"
    )
    op.add_option(
        '--dlp',
        action='store_true',
        default=False,
        help=
        'Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default'
    )
    op.add_option(
        '--sparqlEndpoint',
        action='store_true',
        default=False,
        help=
        'Indicates that the sole argument is the URI of a SPARQL endpoint to query'
    )

    op.add_option(
        '--ontology',
        action='append',
        default=[],
        metavar='PATH_OR_URI',
        help=
        'The path to an OWL RDF/XML graph to use DLP to extract rules from ' +
        '(other wise, fact graph(s) are used)  ')

    op.add_option(
        '--ruleFormat',
        default='n3',
        dest='ruleFormat',
        metavar='RULE_FORMAT',
        choices=['n3', 'rif'],
        help=
        "The format of the rules to parse ('n3', 'rif').  The default is %default"
    )

    op.add_option(
        '--ontologyFormat',
        default='xml',
        dest='ontologyFormat',
        metavar='RDF_FORMAT',
        choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
        help=
        "The format of the OWL RDF/XML graph specified via --ontology.  The default is %default"
    )

    op.add_option(
        '--builtinTemplates',
        default=None,
        metavar='N3_DOC_PATH_OR_URI',
        help=
        'The path to an N3 document associating SPARQL FILTER templates to ' +
        'rule builtins')
    op.add_option(
        '--normalForm',
        action='store_true',
        default=False,
        help='Whether or not to reduce DL axioms & LP rules to a normal form')
    (options, facts) = op.parse_args()

    nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref, nsUri = nsBind.split('=')
        nsBinds[pref] = nsUri

    namespace_manager = NamespaceManager(Graph())
    if options.sparqlEndpoint:
        factGraph = Graph(plugin.get('SPARQL', Store)(facts[0]))
        options.hybrid = False
    else:
        factGraph = Graph()
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts and not options.sparqlEndpoint:
            factGraph.parse(fileN, format='n3')
            print >> sys.stderr, "Parsing RDF facts from ", fileN
        if options.builtins:
            import imp
            userFuncs = imp.load_source('builtins', options.builtins)
            rs = HornFromN3(fileN,
                            additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
            nsBinds.update(rs.nsMapping)
        elif options.ruleFormat == 'rif':
            try:
                from FuXi.Horn.RIFCore import RIFCoreParser
                rif_parser = RIFCoreParser(location=fileN,
                                           debug=options.debug,
                                           nsBindings=nsBinds)
                rs, facts = rif_parser.getRuleset()
            except ImportError, e:
                raise Exception(
                    "Missing 3rd party libraries for RIF processing: %s" % e)
        else:
            rs = HornFromN3(fileN)
            nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)