Esempio n. 1
0
def gms(goals, noMagic, factGraph, ruleSet, defaultBasePreds,
        defaultDerivedPreds, network, workingMemory, strictCheck):
    magicRuleNo = 0
    magicSeeds = []
    for goal in goals:
        goalSeed = AdornLiteral(goal).makeMagicPred()
        print("Magic seed fact (used in bottom-up evaluation)", goalSeed)
        magicSeeds.append(goalSeed.toRDFTuple())
    if noMagic:
        print("Predicates whose magic sets will not be calculated")
        for p in noMagic:
            print("\t", factGraph.qname(p))
    for rule in MagicSetTransformation(
            factGraph, ruleSet, goals, derivedPreds=[],
            strictCheck=strictCheck,
            defaultPredicates=(defaultBasePreds, defaultDerivedPreds),
            noMagic=noMagic):
        magicRuleNo += 1
        network.buildNetworkFromClause(rule)
    if len(list(ruleSet)):
        print("reduction in size of program: %s (%s -> %s clauses)" % (
              100 - (float(magicRuleNo) / float(len(ruleSet))) * 100,
              len(list(ruleSet)), magicRuleNo))
    start = time.time()
    network.feedFactsToAdd(generateTokenSet(magicSeeds))
    if not [rule for rule in factGraph.adornedProgram if len(rule.sip)]:
        warnings.warn("Using GMS sideways information strategy with no " +
                      "information to pass from query.  Falling back to " +
                      "naive method over given facts and rules")
        network.feedFactsToAdd(workingMemory)
    sTime = time.time() - start
    if sTime > 1:
        sTimeStr = "%s seconds" % sTime
    else:
        sTime = sTime * 1000
        sTimeStr = "%s milli seconds" % sTime
    print("Time to calculate closure on working memory: ", sTimeStr)
Esempio n. 2
0
def main():
    from optparse import OptionParser
    op = OptionParser(
        'usage: %prog [options] factFile1 factFile2 ... factFileN')

    op.add_option(
        '--why',
        default=None,
        help='Specifies the goals to solve for using the non-naive methods' +
        'see --method')

    op.add_option(
        '--closure',
        action='store_true',
        default=False,
        help='Whether or not to serialize the inferred triples' +
        ' along with the original triples.  Otherwise ' +
        '(the default behavior), serialize only the inferred triples')

    op.add_option(
        '--imports',
        action='store_true',
        default=False,
        help='Whether or not to follow owl:imports in the fact graph')

    op.add_option(
        '--output',
        default='n3',
        metavar='RDF_FORMAT',
        choices=[
            'xml', 'TriX', 'n3', 'pml', 'proof-graph', 'nt', 'rif', 'rif-xml',
            'conflict', 'man-owl'
        ],
        help=
        "Serialize the inferred triples and/or original RDF triples to STDOUT "
        +
        "using the specified RDF syntax ('xml', 'pretty-xml', 'nt', 'turtle', "
        +
        "or 'n3') or to print a summary of the conflict set (from the RETE " +
        "network) if the value of this option is 'conflict'.  If the the " +
        " value is 'rif' or 'rif-xml', Then the rules used for inference " +
        "will be serialized as RIF.  If the value is 'pml' and --why is used, "
        + " then the PML RDF statements are serialized.  If output is " +
        "'proof-graph then a graphviz .dot file of the proof graph is printed. "
        +
        "Finally if the value is 'man-owl', then the RDF facts are assumed " +
        "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default"
    )

    op.add_option(
        '--class',
        dest='classes',
        action='append',
        default=[],
        metavar='QNAME',
        help='Used with --output=man-owl to determine which ' +
        'classes within the entire OWL/RDF are targetted for serialization' +
        '.  Can be used more than once')

    op.add_option(
        '--hybrid',
        action='store_true',
        default=False,
        help='Used with with --method=bfp to determine whether or not to ' +
        'peek into the fact graph to identify predicates that are both ' +
        'derived and base.  This is expensive for large fact graphs' +
        'and is explicitely not used against SPARQL endpoints')

    op.add_option(
        '--property',
        action='append',
        dest='properties',
        default=[],
        metavar='QNAME',
        help='Used with --output=man-owl or --extract to determine which ' +
        'properties are serialized / extracted.  Can be used more than once')

    op.add_option(
        '--normalize',
        action='store_true',
        default=False,
        help=
        "Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"
        + "The default is %default")

    op.add_option(
        '--ddlGraph',
        default=False,
        help=
        "The location of a N3 Data Description document describing the IDB predicates"
    )

    op.add_option(
        '--input-format',
        default='xml',
        dest='inputFormat',
        metavar='RDF_FORMAT',
        choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
        help=
        "The format of the RDF document(s) which serve as the initial facts " +
        " for the RETE network. One of 'xml', 'n3', 'trix', 'nt', " +
        "or 'rdfa'.  The default is %default")

    op.add_option(
        '--safety',
        default='none',
        metavar='RULE_SAFETY',
        choices=['loose', 'strict', 'none'],
        help="Determines how to handle RIF Core safety.  A value of 'loose' " +
        " means that unsafe rules will be ignored.  A value of 'strict' " +
        " will cause a syntax exception upon any unsafe rule.  A value of " +
        "'none' (the default) does nothing")

    op.add_option(
        '--pDSemantics',
        action='store_true',
        default=False,
        help=
        'Used with --dlp to add pD semantics ruleset for semantics not covered '
        + 'by DLP but can be expressed in definite Datalog Logic Programming' +
        ' The default is %default')

    op.add_option(
        '--stdin',
        action='store_true',
        default=False,
        help=
        'Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default '
    )

    op.add_option(
        '--ns',
        action='append',
        default=[],
        metavar="PREFIX=URI",
        help='Register a namespace binding (QName prefix to a base URI).  This '
        + 'can be used more than once')

    op.add_option(
        '--rules',
        default=[],
        action='append',
        metavar='PATH_OR_URI',
        help='The Notation 3 documents to use as rulesets for the RETE network'
        + '.  Can be specified more than once')

    op.add_option('-d',
                  '--debug',
                  action='store_true',
                  default=True,
                  help='Include debugging output')

    op.add_option(
        '--strictness',
        default='defaultBase',
        metavar='DDL_STRICTNESS',
        choices=['loose', 'defaultBase', 'defaultDerived', 'harsh'],
        help=
        'Used with --why to specify whether to: *not* check if predicates are '
        +
        ' both derived and base (loose), if they are, mark as derived (defaultDerived) '
        +
        'or as base (defaultBase) predicates, else raise an exception (harsh)')

    op.add_option(
        '--method',
        default='naive',
        metavar='reasoning algorithm',
        choices=['gms', 'bfp', 'naive'],
        help='Used with --why to specify how to evaluate answers for query.  '
        + 'One of: gms, sld, bfp, naive')

    op.add_option(
        '--firstAnswer',
        default=False,
        action='store_true',
        help=
        'Used with --why to determine whether to fetch all answers or just ' +
        'the first')

    op.add_option(
        '--edb',
        default=[],
        action='append',
        metavar='EXTENSIONAL_DB_PREDICATE_QNAME',
        help=
        'Used with --why/--strictness=defaultDerived to specify which clashing '
        + 'predicate will be designated as a base predicate')

    op.add_option(
        '--idb',
        default=[],
        action='append',
        metavar='INTENSIONAL_DB_PREDICATE_QNAME',
        help=
        'Used with --why/--strictness=defaultBase to specify which clashing ' +
        'predicate will be designated as a derived predicate')

    op.add_option(
        '--hybridPredicate',
        default=[],
        action='append',
        metavar='PREDICATE_QNAME',
        help=
        'Used with --why to explicitely specify a hybrid predicate (in both ' +
        ' IDB and EDB) ')

    op.add_option(
        '--noMagic',
        default=[],
        action='append',
        metavar='DB_PREDICATE_QNAME',
        help='Used with --why to specify that the predicate shouldnt have its '
        + 'magic sets calculated')

    op.add_option(
        '--filter',
        action='append',
        default=[],
        metavar='PATH_OR_URI',
        help=
        'The Notation 3 documents to use as a filter (entailments do not particpate in network)'
    )

    op.add_option(
        '--ruleFacts',
        action='store_true',
        default=False,
        help="Determines whether or not to attempt to parse initial facts from "
        + "the rule graph.  The default is %default")

    op.add_option(
        '--builtins',
        default=False,
        metavar='PATH_TO_PYTHON_MODULE',
        help="The path to a python module with function definitions (and a " +
        "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations"
    )

    op.add_option(
        '--dlp',
        action='store_true',
        default=False,
        help=
        'Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default'
    )

    op.add_option(
        '--sparqlEndpoint',
        action='store_true',
        default=False,
        help=
        'Indicates that the sole argument is the URI of a SPARQL endpoint to query'
    )

    op.add_option(
        '--ontology',
        action='append',
        default=[],
        metavar='PATH_OR_URI',
        help=
        'The path to an OWL RDF/XML graph to use DLP to extract rules from ' +
        '(other wise, fact graph(s) are used)  ')

    op.add_option(
        '--ontologyFormat',
        default='xml',
        dest='ontologyFormat',
        metavar='RDF_FORMAT',
        choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
        help=
        "The format of the OWL RDF/XML graph specified via --ontology.  The default is %default"
    )

    op.add_option(
        '--builtinTemplates',
        default=None,
        metavar='N3_DOC_PATH_OR_URI',
        help=
        'The path to an N3 document associating SPARQL FILTER templates to ' +
        'rule builtins')

    op.add_option('--negation',
                  action='store_true',
                  default=False,
                  help='Extract negative rules?')

    op.add_option(
        '--normalForm',
        action='store_true',
        default=False,
        help='Whether or not to reduce DL axioms & LP rules to a normal form')
    (options, facts) = op.parse_args()

    nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref, nsUri = nsBind.split('=')
        nsBinds[pref] = nsUri

    namespace_manager = NamespaceManager(Graph())
    if options.sparqlEndpoint:
        factGraph = Graph(plugin.get('SPARQLStore', Store)(facts[0]))
        options.hybrid = False
    else:
        factGraph = Graph()
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts and not options.sparqlEndpoint:
            factGraph.parse(fileN, format='n3')
            print("Parsing RDF facts from ", fileN)
        if options.builtins:
            import imp
            userFuncs = imp.load_source('builtins', options.builtins)
            rs = HornFromN3(fileN,
                            additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
        else:
            rs = HornFromN3(fileN)
        nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)
        #ruleGraph.parse(fileN, format='n3')

    ruleSet.nsMapping = nsBinds

    for prefix, uri in list(nsBinds.items()):
        namespace_manager.bind(prefix, uri, override=False)
    closureDeltaGraph = Graph()
    closureDeltaGraph.namespace_manager = namespace_manager
    factGraph.namespace_manager = namespace_manager

    if not options.sparqlEndpoint:
        for fileN in facts:
            factGraph.parse(fileN, format=options.inputFormat)
            if options.imports:
                for owlImport in factGraph.objects(predicate=OWL_NS.imports):
                    factGraph.parse(owlImport)
                    print("Parsed Semantic Web Graph.. ", owlImport)

    if not options.sparqlEndpoint and facts:
        for pref, uri in factGraph.namespaces():
            nsBinds[pref] = uri

    if options.stdin:
        assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint"
        factGraph.parse(sys.stdin, format=options.inputFormat)

    #Normalize namespace mappings
    #prune redundant, rdflib-allocated namespace prefix mappings
    newNsMgr = NamespaceManager(factGraph)
    from FuXi.Rete.Util import CollapseDictionary
    for k, v in list(
            CollapseDictionary(
                dict([(k, v) for k, v in factGraph.namespaces()])).items()):
        newNsMgr.bind(k, v)
    factGraph.namespace_manager = newNsMgr

    if options.normalForm:
        NormalFormReduction(factGraph)

    if not options.sparqlEndpoint:
        workingMemory = generateTokenSet(factGraph)
    if options.builtins:
        import imp
        userFuncs = imp.load_source('builtins', options.builtins)
        rule_store, rule_graph, network = SetupRuleStore(
            makeNetwork=True, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
    else:
        rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    network.inferredFacts = closureDeltaGraph
    network.nsMap = nsBinds

    if options.dlp:
        from FuXi.DLP.DLNormalization import NormalFormReduction
        if options.ontology:
            ontGraph = Graph()
            for fileN in options.ontology:
                ontGraph.parse(fileN, format=options.ontologyFormat)
                for prefix, uri in ontGraph.namespaces():
                    nsBinds[prefix] = uri
                    namespace_manager.bind(prefix, uri, override=False)
                    if options.sparqlEndpoint:
                        factGraph.store.bind(prefix, uri)
        else:
            ontGraph = factGraph
        NormalFormReduction(ontGraph)
        dlp = network.setupDescriptionLogicProgramming(
            ontGraph,
            addPDSemantics=options.pDSemantics,
            constructNetwork=False,
            ignoreNegativeStratus=options.negation,
            safety=safetyNameMap[options.safety])
        ruleSet.formulae.extend(dlp)
    if options.output == 'rif' and not options.why:
        for rule in ruleSet:
            print(rule)
        if options.negation:
            for nRule in network.negRules:
                print(nRule)

    elif options.output == 'man-owl':
        cGraph = network.closureGraph(factGraph, readOnly=False)
        cGraph.namespace_manager = namespace_manager
        Individual.factoryGraph = cGraph
        if options.classes:
            mapping = dict(namespace_manager.namespaces())
            for c in options.classes:
                pref, uri = c.split(':')
                print(Class(URIRef(mapping[pref] + uri)).__repr__(True))
        elif options.properties:
            mapping = dict(namespace_manager.namespaces())
            for p in options.properties:
                pref, uri = p.split(':')
                print(Property(URIRef(mapping[pref] + uri)))
        else:
            for p in AllProperties(cGraph):
                print(p.identifier, first(p.label))
                print(repr(p))
            for c in AllClasses(cGraph):
                if options.normalize:
                    if c.isPrimitive():
                        primAnc = [
                            sc for sc in c.subClassOf if sc.isPrimitive()
                        ]
                        if len(primAnc) > 1:
                            warnings.warn(
                                "Branches of primitive skeleton taxonomy" +
                                " should form trees: %s has %s primitive parents: %s"
                                % (c.qname, len(primAnc), primAnc),
                                UserWarning, 1)
                        children = [desc for desc in c.subSumpteeIds()]
                        for child in children:
                            for otherChild in [
                                    o for o in children if o is not child
                            ]:
                                if not otherChild in [
                                        c.identifier
                                        for c in Class(child).disjointWith
                                ]:  # and \
                                    warnings.warn(
                                        "Primitive children (of %s) " % (c.qname) + \
                                        "must be mutually disjoint: %s and %s" % (
                                    Class(child).qname, Class(otherChild).qname), UserWarning, 1)
                # if not isinstance(c.identifier, BNode):
                print(c.__repr__(True))

    if not options.why:
        # Naive construction of graph
        for rule in ruleSet:
            network.buildNetworkFromClause(rule)

    magicSeeds = []
    if options.why:
        builtinTemplateGraph = Graph()
        if options.builtinTemplates:
            builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                 format='n3')
        factGraph.templateMap = \
            dict([(pred, template)
                      for pred, _ignore, template in
                            builtinTemplateGraph.triples(
                                (None,
                                 TEMPLATES.filterTemplate,
                                 None))])
        goals = []
        query = ParseSPARQL(options.why)
        network.nsMap['pml'] = PML
        network.nsMap['gmp'] = GMP_NS
        network.nsMap['owl'] = OWL_NS
        nsBinds.update(network.nsMap)
        network.nsMap = nsBinds
        if not query.prologue:
            query.prologue = Prologue(None, [])
            query.prologue.prefixBindings.update(nsBinds)
        else:
            for prefix, nsInst in list(nsBinds.items()):
                if prefix not in query.prologue.prefixBindings:
                    query.prologue.prefixBindings[prefix] = nsInst
        print("query.prologue", query.prologue)
        print("query.query", query.query)
        print("query.query.whereClause", query.query.whereClause)
        print("query.query.whereClause.parsedGraphPattern",
              query.query.whereClause.parsedGraphPattern)
        goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern(
            query.query.whereClause.parsedGraphPattern,
            query.prologue).patterns])
        # dPreds=[]# p for s, p, o in goals ]
        # print("goals", goals)
        magicRuleNo = 0
        bottomUpDerivedPreds = []
        # topDownDerivedPreds  = []
        defaultBasePreds = []
        defaultDerivedPreds = set()
        hybridPredicates = []
        mapping = dict(newNsMgr.namespaces())
        for edb in options.edb:
            pref, uri = edb.split(':')
            defaultBasePreds.append(URIRef(mapping[pref] + uri))
        noMagic = []
        for pred in options.noMagic:
            pref, uri = pred.split(':')
            noMagic.append(URIRef(mapping[pref] + uri))
        if options.ddlGraph:
            ddlGraph = Graph().parse(options.ddlGraph, format='n3')
            # @TODO: should also get hybrid predicates from DDL graph
            defaultDerivedPreds = IdentifyDerivedPredicates(
                ddlGraph, Graph(), ruleSet)
        else:
            for idb in options.idb:
                pref, uri = idb.split(':')
                defaultDerivedPreds.add(URIRef(mapping[pref] + uri))
            defaultDerivedPreds.update(
                set([p == RDF.type and o or p for s, p, o in goals]))
            for hybrid in options.hybridPredicate:
                pref, uri = hybrid.split(':')
                hybridPredicates.append(URIRef(mapping[pref] + uri))

        if options.method == 'gms':
            for goal in goals:
                goalSeed = AdornLiteral(goal).makeMagicPred()
                print("Magic seed fact (used in bottom-up evaluation)",
                      goalSeed)
                magicSeeds.append(goalSeed.toRDFTuple())
            if noMagic:
                print("Predicates whose magic sets will not be calculated")
                for p in noMagic:
                    print("\t", factGraph.qname(p))
            for rule in MagicSetTransformation(
                    factGraph,
                    ruleSet,
                    goals,
                    derivedPreds=bottomUpDerivedPreds,
                    strictCheck=nameMap[options.strictness],
                    defaultPredicates=(defaultBasePreds, defaultDerivedPreds),
                    noMagic=noMagic):
                magicRuleNo += 1
                network.buildNetworkFromClause(rule)
            if len(list(ruleSet)):
                print("reduction in size of program: %s (%s -> %s clauses)" %
                      (100 -
                       (float(magicRuleNo) / float(len(list(ruleSet)))) * 100,
                       len(list(ruleSet)), magicRuleNo))
            start = time.time()
            network.feedFactsToAdd(generateTokenSet(magicSeeds))
            if not [
                    rule for rule in factGraph.adornedProgram if len(rule.sip)
            ]:
                warnings.warn(
                    "Using GMS sideways information strategy with no " +
                    "information to pass from query.  Falling back to " +
                    "naive method over given facts and rules")
                network.feedFactsToAdd(workingMemory)
            sTime = time.time() - start
            if sTime > 1:
                sTimeStr = "%s seconds" % sTime
            else:
                sTime = sTime * 1000
                sTimeStr = "%s milli seconds" % sTime
            print("Time to calculate closure on working memory: ", sTimeStr)

            if options.output == 'rif':
                print("Rules used for bottom-up evaluation")
                if network.rules:
                    for clause in network.rules:
                        print(clause)
                else:
                    for clause in factGraph.adornedProgram:
                        print(clause)
            if options.output == 'conflict':
                network.reportConflictSet()

        elif options.method == 'bfp':
            topDownDPreds = defaultDerivedPreds
            if options.builtinTemplates:
                builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                     format='n3')
                builtinDict = dict([
                    (pred, template) for pred, _ignore, template in
                    builtinTemplateGraph.triples((None,
                                                  TEMPLATES.filterTemplate,
                                                  None))
                ])
            else:
                builtinDict = None
            topDownStore = TopDownSPARQLEntailingStore(
                factGraph.store,
                factGraph,
                idb=ruleSet,
                DEBUG=options.debug,
                derivedPredicates=topDownDPreds,
                templateMap=builtinDict,
                nsBindings=network.nsMap,
                identifyHybridPredicates=options.hybrid
                if options.method == 'bfp' else False,
                hybridPredicates=hybridPredicates)
            targetGraph = Graph(topDownStore)
            for pref, nsUri in list(network.nsMap.items()):
                targetGraph.bind(pref, nsUri)
            start = time.time()
            # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal) for goal in goals],
            #                         targetGraph)
            # query = queryLiteral.asSPARQL()
            # print("Goal to solve ", query)
            sTime = time.time() - start
            result = targetGraph.query(options.why, initNs=network.nsMap)
            if result.askAnswer:
                sTime = time.time() - start
                if sTime > 1:
                    sTimeStr = "%s seconds" % sTime
                else:
                    sTime = sTime * 1000
                    sTimeStr = "%s milli seconds" % sTime
                print("Time to reach answer ground goal answer of %s: %s" %
                      (result.askAnswer[0], sTimeStr))
            else:
                for rt in result:
                    sTime = time.time() - start
                    if sTime > 1:
                        sTimeStr = "%s seconds" % sTime
                    else:
                        sTime = sTime * 1000
                        sTimeStr = "%s milli seconds" % sTime
                    if options.firstAnswer:
                        break
                    print(
                        "Time to reach answer %s via top-down SPARQL sip strategy: %s"
                        % (rt, sTimeStr))
            if options.output == 'conflict' and options.method == 'bfp':
                for _network, _goal in topDownStore.queryNetworks:
                    print(network, _goal)
                    _network.reportConflictSet(options.debug)
                for query in topDownStore.edbQueries:
                    print(query.asSPARQL())

    elif options.method == 'naive':
        start = time.time()
        network.feedFactsToAdd(workingMemory)
        sTime = time.time() - start
        if sTime > 1:
            sTimeStr = "%s seconds" % sTime
        else:
            sTime = sTime * 1000
            sTimeStr = "%s milli seconds" % sTime
        print("Time to calculate closure on working memory: ", sTimeStr)
        print(network)
        if options.output == 'conflict':
            network.reportConflictSet()

    for fileN in options.filter:
        for rule in HornFromN3(fileN):
            network.buildFilterNetworkFromClause(rule)

    if options.negation and network.negRules and options.method in [
            'both', 'bottomUp'
    ]:
        now = time.time()
        rt = network.calculateStratifiedModel(factGraph)
        print(
            "Time to calculate stratified, stable model (inferred %s facts): %s"
            % (rt, time.time() - now))
    if options.filter:
        print("Applying filter to entailed facts")
        network.inferredFacts = network.filteredFacts

    if options.closure and options.output in RDF_SERIALIZATION_FORMATS:
        cGraph = network.closureGraph(factGraph)
        cGraph.namespace_manager = namespace_manager
        print(
            cGraph.serialize(destination=None,
                             format=options.output,
                             base=None))
    elif options.output and options.output in RDF_SERIALIZATION_FORMATS:
        print(
            network.inferredFacts.serialize(destination=None,
                                            format=options.output,
                                            base=None))
Esempio n. 3
0
def main():
    from optparse import OptionParser
    op = OptionParser(
      'usage: %prog [options] factFile1 factFile2 ... factFileN')
    op.add_option('--why',
                  default=None,
      help='Specifies the goals to solve for using the non-niave methods' +
              'see --method')
    op.add_option('--closure',
                  action='store_true',
                  default=False,
      help='Whether or not to serialize the inferred triples' +
             ' along with the original triples.  Otherwise ' +
              '(the default behavior), serialize only the inferred triples')
    op.add_option('--imports',
                action='store_true',
                default=False,
    help='Whether or not to follow owl:imports in the fact graph')
    op.add_option('--output',
                  default='n3',
                  metavar='RDF_FORMAT',
                  choices=['xml',
                             'TriX',
                             'n3',
                             'pml',
                             'proof-graph',
                             'nt',
                             'rif',
                             'rif-xml',
                             'conflict',
                             'man-owl'],
      help="Serialize the inferred triples and/or original RDF triples to STDOUT "+
             "using the specified RDF syntax ('xml','pretty-xml','nt','turtle', "+
             "or 'n3') or to print a summary of the conflict set (from the RETE "+
             "network) if the value of this option is 'conflict'.  If the the "+
             " value is 'rif' or 'rif-xml', Then the rules used for inference "+
             "will be serialized as RIF.  If the value is 'pml' and --why is used, "+
             " then the PML RDF statements are serialized.  If output is "+
             "'proof-graph then a graphviz .dot file of the proof graph is printed. "+
             "Finally if the value is 'man-owl', then the RDF facts are assumed "+
             "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default")
    op.add_option('--class',
                  dest='classes',
                  action='append',
                  default=[],
                  metavar='QNAME',
      help='Used with --output=man-owl to determine which '+
             'classes within the entire OWL/RDF are targetted for serialization'+
             '.  Can be used more than once')
    op.add_option('--hybrid',
                  action='store_true',
                  default=False,
      help='Used with with --method=bfp to determine whether or not to '+
             'peek into the fact graph to identify predicates that are both '+
             'derived and base.  This is expensive for large fact graphs'+
             'and is explicitely not used against SPARQL endpoints')
    op.add_option('--property',
                  action='append',
                  dest='properties',
                  default=[],
                  metavar='QNAME',
      help='Used with --output=man-owl or --extract to determine which '+
             'properties are serialized / extracted.  Can be used more than once')
    op.add_option('--normalize',
                  action='store_true',
                  default=False,
      help="Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"+
      "The default is %default")
    op.add_option('--ddlGraph',
                default=False,
      help="The location of a N3 Data Description document describing the IDB predicates")
    op.add_option('--input-format',
                  default='xml',
                  dest='inputFormat',
                  metavar='RDF_FORMAT',
                  choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
      help="The format of the RDF document(s) which serve as the initial facts "+
             " for the RETE network. One of 'xml','n3','trix', 'nt', "+
             "or 'rdfa'.  The default is %default")
    op.add_option('--safety',
                  default='none',
                  metavar='RULE_SAFETY',
                  choices=['loose', 'strict', 'none'],
      help="Determines how to handle RIF Core safety.  A value of 'loose' "+
             " means that unsafe rules will be ignored.  A value of 'strict' "+
             " will cause a syntax exception upon any unsafe rule.  A value of "+
             "'none' (the default) does nothing")
    op.add_option('--pDSemantics',
                  action='store_true',
                  default=False,
      help='Used with --dlp to add pD semantics ruleset for semantics not covered '+
      'by DLP but can be expressed in definite Datalog Logic Programming'+
      ' The default is %default')
    op.add_option('--stdin',
                  action='store_true',
                  default=False,
      help='Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ')
    op.add_option('--ns',
                  action='append',
                  default=[],
                  metavar="PREFIX=URI",
      help='Register a namespace binding (QName prefix to a base URI).  This '+
             'can be used more than once')
    op.add_option('--rules',
                  default=[],
                  action='append',
                  metavar='PATH_OR_URI',
      help='The Notation 3 documents to use as rulesets for the RETE network'+
      '.  Can be specified more than once')
    op.add_option('-d', '--debug', action='store_true', default=False,
      help='Include debugging output')
    op.add_option('--strictness',
                  default='defaultBase',
                  metavar='DDL_STRICTNESS',
                  choices=['loose',
                             'defaultBase',
                             'defaultDerived',
                             'harsh'],
      help='Used with --why to specify whether to: *not* check if predicates are '+
      ' both derived and base (loose), if they are, mark as derived (defaultDerived) '+
      'or as base (defaultBase) predicates, else raise an exception (harsh)')
    op.add_option('--method',
                  default='naive',
                  metavar='reasoning algorithm',
                  choices=['gms', 'bfp', 'naive'],
      help='Used with --why to specify how to evaluate answers for query.  '+
      'One of: gms,bfp,naive')
    op.add_option('--firstAnswer',
                  default=False,
                  action='store_true',
      help='Used with --why to determine whether to fetch all answers or just '+
      'the first')
    op.add_option('--edb',
                  default=[],
                  action='append',
                  metavar='EXTENSIONAL_DB_PREDICATE_QNAME',
      help='Used with --why/--strictness=defaultDerived to specify which clashing '+
      'predicate will be designated as a base predicate')
    op.add_option('--idb',
                  default=[],
                  action='append',
                  metavar='INTENSIONAL_DB_PREDICATE_QNAME',
      help='Used with --why/--strictness=defaultBase to specify which clashing '+
      'predicate will be designated as a derived predicate')
    op.add_option('--hybridPredicate',
                default=[],
                action='append',
                metavar='PREDICATE_QNAME',
    help='Used with --why to explicitely specify a hybrid predicate (in both '+
           ' IDB and EDB) ')

    op.add_option('--noMagic',
                  default=[],
                  action='append',
                  metavar='DB_PREDICATE_QNAME',
      help='Used with --why to specify that the predicate shouldnt have its '+
      'magic sets calculated')
    op.add_option('--filter',
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
      help='The Notation 3 documents to use as a filter (entailments do not particpate in network)')
    op.add_option('--ruleFacts',
                  action='store_true',
                  default=False,
      help="Determines whether or not to attempt to parse initial facts from "+
      "the rule graph.  The default is %default")
    op.add_option('--builtins',
                  default=False,
                  metavar='PATH_TO_PYTHON_MODULE',
      help="The path to a python module with function definitions (and a "+
      "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations")
    op.add_option('--dlp',
                  action='store_true',
                  default=False,
      help='Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default')
    op.add_option('--sparqlEndpoint',
                action='store_true',
                default=False,
    help='Indicates that the sole argument is the URI of a SPARQL endpoint to query')

    op.add_option('--ontology',
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
      help='The path to an OWL RDF/XML graph to use DLP to extract rules from '+
      '(other wise, fact graph(s) are used)  ')

    op.add_option('--ruleFormat',
        default='n3',
        dest='ruleFormat',
        metavar='RULE_FORMAT',
        choices=['n3', 'rif'],
        help="The format of the rules to parse ('n3', 'rif').  The default is %default")

    op.add_option('--ontologyFormat',
                default='xml',
                dest='ontologyFormat',
                metavar='RDF_FORMAT',
                choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
    help="The format of the OWL RDF/XML graph specified via --ontology.  The default is %default")

    op.add_option('--builtinTemplates',
                  default=None,
                  metavar='N3_DOC_PATH_OR_URI',
      help='The path to an N3 document associating SPARQL FILTER templates to '+
      'rule builtins')
    op.add_option('--negation',
                  action='store_true',
                  default=False,
      help='Extract negative rules?')
    op.add_option('--normalForm',
                  action='store_true',
                  default=False,
      help='Whether or not to reduce DL axioms & LP rules to a normal form')
    (options, facts) = op.parse_args()

    nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref, nsUri = nsBind.split('=')
        nsBinds[pref]=nsUri

    namespace_manager = NamespaceManager(Graph())
    if options.sparqlEndpoint:
        factGraph = Graph(plugin.get('SPARQL', Store)(facts[0]))
        options.hybrid = False
    else:
        factGraph = Graph()
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts and not options.sparqlEndpoint:
            factGraph.parse(fileN, format='n3')
            print("Parsing RDF facts from %s" % fileN)
        if options.builtins:
            import imp
            userFuncs = imp.load_source('builtins', options.builtins)
            rs = HornFromN3(fileN,
                            additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
            nsBinds.update(rs.nsMapping)
        elif options.ruleFormat == 'rif':
            try:
                from FuXi.Horn.RIFCore import RIFCoreParser
                rif_parser = RIFCoreParser(location=fileN, debug=options.debug)
                rs = rif_parser.getRuleset()
            except ImportError:
                raise Exception(
                    "Missing 3rd party libraries for RIF processing"
                )
        else:
            rs = HornFromN3(fileN)
        nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)
        #ruleGraph.parse(fileN,format='n3')

    ruleSet.nsMapping = nsBinds

    for prefix, uri in list(nsBinds.items()):
        namespace_manager.bind(prefix, uri, override=False)
    closureDeltaGraph = Graph()
    closureDeltaGraph.namespace_manager = namespace_manager
    factGraph.namespace_manager = namespace_manager

    if not options.sparqlEndpoint:
        for fileN in facts:
            factGraph.parse(fileN, format=options.inputFormat)
            if options.imports:
                for owlImport in factGraph.objects(predicate=OWL_NS.imports):
                    factGraph.parse(owlImport)
                    print("Parsed Semantic Web Graph.. %s" % owlImport)

    if not options.sparqlEndpoint and facts:
        for pref, uri in factGraph.namespaces():
            nsBinds[pref]=uri

    if options.stdin:
        assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint"
        factGraph.parse(sys.stdin, format=options.inputFormat)

    #Normalize namespace mappings
    #prune redundant, rdflib-allocated namespace prefix mappings
    newNsMgr = NamespaceManager(factGraph)
    from FuXi.Rete.Util import CollapseDictionary
    for k, v in list(CollapseDictionary(dict([(k, v)
                                    for k, v in factGraph.namespaces()])).items()):
        newNsMgr.bind(k, v)
    factGraph.namespace_manager = newNsMgr

    if options.normalForm:
        NormalFormReduction(factGraph)

    if not options.sparqlEndpoint:
        workingMemory = generateTokenSet(factGraph)
    if options.builtins:
        import imp
        userFuncs = imp.load_source('builtins', options.builtins)
        rule_store, rule_graph, network = SetupRuleStore(
                             makeNetwork=True,
                             additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
    else:
        rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    network.inferredFacts = closureDeltaGraph
    network.nsMap = nsBinds

    if options.dlp:
        from FuXi.DLP.DLNormalization import NormalFormReduction
        if options.ontology:
            ontGraph = Graph()
            for fileN in options.ontology:
                ontGraph.parse(fileN, format=options.ontologyFormat)
                for prefix, uri in ontGraph.namespaces():
                    nsBinds[prefix] = uri
                    namespace_manager.bind(prefix, uri, override=False)
                    if options.sparqlEndpoint:
                        factGraph.store.bind(prefix, uri)
        else:
            ontGraph=factGraph
        NormalFormReduction(ontGraph)
        dlp=network.setupDescriptionLogicProgramming(
                                 ontGraph,
                                 addPDSemantics=options.pDSemantics,
                                 constructNetwork=False,
                                 ignoreNegativeStratus=options.negation,
                                 safety=safetyNameMap[options.safety])
        ruleSet.formulae.extend(dlp)
    if options.output == 'rif' and not options.why:
        for rule in ruleSet:
            print(rule)
        if options.negation:
            for nRule in network.negRules:
                print(nRule)

    elif options.output == 'man-owl':
        cGraph = network.closureGraph(factGraph, readOnly=False)
        cGraph.namespace_manager = namespace_manager
        Individual.factoryGraph = cGraph
        if options.classes:
            mapping = dict(namespace_manager.namespaces())
            for c in options.classes:
                pref, uri = c.split(':')
                print(Class(URIRef(mapping[pref] + uri)).__repr__(True))
        elif options.properties:
            mapping = dict(namespace_manager.namespaces())
            for p in options.properties:
                pref, uri = p.split(':')
                print(Property(URIRef(mapping[pref] + uri)))
        else:
            for p in AllProperties(cGraph):
                print(p.identifier, first(p.label))
                print(repr(p))
            for c in AllClasses(cGraph):
                if options.normalize:
                    if c.isPrimitive():
                        primAnc = [sc for sc in c.subClassOf if sc.isPrimitive()]
                        if len(primAnc) > 1:
                            warnings.warn("Branches of primitive skeleton taxonomy" +
                              " should form trees: %s has %s primitive parents: %s" % (
                             c.qname, len(primAnc), primAnc), UserWarning, 1)
                        children = [desc for desc in c.subSumpteeIds()]
                        for child in children:
                            for otherChild in [o for o in children if o is not child]:
                                if not otherChild in [c.identifier
                                          for c in Class(child).disjointWith]:  # and\
                                    warnings.warn("Primitive children (of %s) " % (c.qname) +
                                          "must be mutually disjoint: %s and %s" % (
                                      Class(child).qname,
                                      Class(otherChild).qname), UserWarning, 1)
                # if not isinstance(c.identifier,BNode):
                print(c.__repr__(True))

    if not options.why:
        #Naive construction of graph
        for rule in ruleSet:
            network.buildNetworkFromClause(rule)

    magicSeeds=[]
    if options.why:
        builtinTemplateGraph = Graph()
        if options.builtinTemplates:
            builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                format='n3')
        factGraph.templateMap = \
            dict([(pred, template)
                      for pred, _ignore, template in
                            builtinTemplateGraph.triples(
                                (None,
                                 TEMPLATES.filterTemplate,
                                 None))])
        goals = []
        query = ParseSPARQL(options.why)
        network.nsMap['pml'] = PML
        network.nsMap['gmp'] = GMP_NS
        network.nsMap['owl'] = OWL_NS
        nsBinds.update(network.nsMap)
        network.nsMap = nsBinds
        if not query.prolog:
                query.prolog = Prolog(None, [])
                query.prolog.prefixBindings.update(nsBinds)
        else:
            for prefix, nsInst in list(nsBinds.items()):
                if prefix not in query.prolog.prefixBindings:
                    query.prolog.prefixBindings[prefix] = nsInst
        goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern(
                                    query.query.whereClause.parsedGraphPattern,
                                    query.prolog).patterns])
        # dPreds=[]# p for s,p,o in goals ]
        magicRuleNo = 0
        bottomUpDerivedPreds = []
        # topDownDerivedPreds  = []
        defaultBasePreds = []
        defaultDerivedPreds = set()
        hybridPredicates = []
        mapping = dict(newNsMgr.namespaces())
        for edb in options.edb:
            pref, uri = edb.split(':')
            defaultBasePreds.append(URIRef(mapping[pref] + uri))
        noMagic = []
        for pred in options.noMagic:
            pref, uri = pred.split(':')
            noMagic.append(URIRef(mapping[pref] + uri))
        if options.ddlGraph:
            ddlGraph = Graph().parse(options.ddlGraph, format='n3')
            # @TODO: should also get hybrid predicates from DDL graph
            defaultDerivedPreds=IdentifyDerivedPredicates(
                                    ddlGraph,
                                    Graph(),
                                    ruleSet)
        else:
            for idb in options.idb:
                pref, uri = idb.split(':')
                defaultDerivedPreds.add(URIRef(mapping[pref] + uri))
            defaultDerivedPreds.update(
                set([p == RDF.type and o or p for s, p, o in goals]))
            for hybrid in options.hybridPredicate:
                pref, uri = hybrid.split(':')
                hybridPredicates.append(URIRef(mapping[pref]+uri))

        if options.method == 'gms':
            for goal in goals:
                goalSeed=AdornLiteral(goal).makeMagicPred()
                print("Magic seed fact (used in bottom-up evaluation) %s" % goalSeed)
                magicSeeds.append(goalSeed.toRDFTuple())
            if noMagic:
                print("Predicates whose magic sets will not be calculated")
                for p in noMagic:
                    print("\t%s" % factGraph.qname(p))
            for rule in MagicSetTransformation(
                                       factGraph,
                                       ruleSet,
                                       goals,
                                       derivedPreds=bottomUpDerivedPreds,
                                       strictCheck=nameMap[options.strictness],
                                       defaultPredicates=(defaultBasePreds,
                                                          defaultDerivedPreds),
                                       noMagic=noMagic):
                magicRuleNo+=1
                network.buildNetworkFromClause(rule)
            if len(list(ruleSet)):
                print("reduction in size of program: %s (%s -> %s clauses)" % (
                                           100 - (float(magicRuleNo) /
                                                  float(len(list(ruleSet)))
                                                  ) * 100,
                                           len(list(ruleSet)),
                                           magicRuleNo))
            start = time.time()
            network.feedFactsToAdd(generateTokenSet(magicSeeds))
            if not [
                rule for rule in factGraph.adornedProgram if len(rule.sip)]:
                warnings.warn(
                    "Using GMS sideways information strategy with no "+
                      "information to pass from query.  Falling back to "+
                      "naive method over given facts and rules")
                network.feedFactsToAdd(workingMemory)
            sTime = time.time() - start
            if sTime > 1:
                sTimeStr = "%s seconds"%sTime
            else:
                sTime = sTime * 1000
                sTimeStr = "%s milli seconds"%sTime
            print("Time to calculate closure on working memory: %s" % sTimeStr)

            if options.output == 'rif':
                print("Rules used for bottom-up evaluation")
                if network.rules:
                    for clause in network.rules:
                        print(clause)
                else:
                    for clause in factGraph.adornedProgram:
                        print(clause)
            if options.output == 'conflict':
                network.reportConflictSet()

        elif options.method == 'bfp':
            topDownDPreds = defaultDerivedPreds
            if options.builtinTemplates:
                builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                    format='n3')
                builtinDict = dict([(pred, template)
                              for pred, _ignore, template in
                                    builtinTemplateGraph.triples(
                                        (None,
                                         TEMPLATES.filterTemplate,
                                         None))])
            else:
                builtinDict = None
            topDownStore=TopDownSPARQLEntailingStore(
                            factGraph.store,
                            factGraph,
                            idb=ruleSet,
                            DEBUG=options.debug,
                            derivedPredicates=topDownDPreds,
                            templateMap=builtinDict,
                            nsBindings=network.nsMap,
                            identifyHybridPredicates=options.hybrid \
                                    if options.method == 'bfp' else False,
                            hybridPredicates=hybridPredicates)
            targetGraph = Graph(topDownStore)
            for pref, nsUri in list(network.nsMap.items()):
                targetGraph.bind(pref, nsUri)
            start = time.time()
            # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal)
            #                                   for goal in goals],
            #                         targetGraph)
            # query = queryLiteral.asSPARQL()
            # print >>sys.stderr, "Goal to solve ", query
            sTime = time.time() - start
            result = targetGraph.query(options.why, initNs=network.nsMap)
            if result.askAnswer:
                sTime = time.time() - start
                if sTime > 1:
                    sTimeStr = "%s seconds"%sTime
                else:
                    sTime = sTime * 1000
                    sTimeStr = "%s milli seconds"%sTime
                print("Time to reach answer ground goal answer of %s: %s" % (
                      result.askAnswer[0], sTimeStr))
            else:
                for rt in result:
                    sTime = time.time() - start
                    if sTime > 1:
                        sTimeStr = "%s seconds" % sTime
                    else:
                        sTime = sTime * 1000
                        sTimeStr = "%s milli seconds" % sTime
                    if options.firstAnswer:
                        break
                    print(
                    "Time to reach answer %s via top-down SPARQL sip strategy: %s" % (
                    rt, sTimeStr))
            if options.output == 'conflict' and options.method == 'bfp':
                for _network, _goal in topDownStore.queryNetworks:
                    print(_network, _goal)
                    _network.reportConflictSet(options.debug)
                for query in topDownStore.edbQueries:
                    print(query.asSPARQL())

    elif options.method == 'naive':
        start = time.time()
        network.feedFactsToAdd(workingMemory)
        sTime = time.time() - start
        if sTime > 1:
            sTimeStr = "%s seconds"%sTime
        else:
            sTime = sTime * 1000
            sTimeStr = "%s milli seconds"%sTime
        print("Time to calculate closure on working memory: %s" % sTimeStr)
        print(network)
        if options.output == 'conflict':
            network.reportConflictSet()

    for fileN in options.filter:
        for rule in HornFromN3(fileN):
            network.buildFilterNetworkFromClause(rule)

    if options.negation and network.negRules and options.method in ['both',
                                                                    'bottomUp']:
        now=time.time()
        rt=network.calculateStratifiedModel(factGraph)
        print("Time to calculate stratified, stable model (inferred %s facts): %s" % (
                                    rt,
                                    time.time()-now))
    if options.filter:
        print("Applying filter to entailed facts")
        network.inferredFacts = network.filteredFacts

    if options.closure \
        and options.output in RDF_SERIALIZATION_FORMATS:
        cGraph = network.closureGraph(factGraph)
        cGraph.namespace_manager = namespace_manager
        print(cGraph.serialize(destination=None,
                               format=options.output,
                               base=None))
    elif options.output and options.output in RDF_SERIALIZATION_FORMATS:
        print(network.inferredFacts.serialize(destination=None,
                                              format=options.output,
                                              base=None))
Esempio n. 4
0
    def MagicOWLProof(self, goals, rules, factGraph, conclusionFile):
        progLen = len(rules)
        magicRuleNo = 0
        dPreds = []
        for rule in AdditionalRules(factGraph):
            rules.append(rule)
        if not GROUND_QUERY and REASONING_STRATEGY != 'gms':
            goalDict = dict([((Variable('SUBJECT'),goalP,goalO),goalS)
                        for goalS,goalP,goalO in goals])
            goals = goalDict.keys()
        assert goals

        if REASONING_STRATEGY == 'gms':
            for rule in MagicSetTransformation(factGraph,
                                               rules,
                                               goals,
                                               dPreds):
                magicRuleNo += 1
                self.network.buildNetworkFromClause(rule)
                self.network.rules.add(rule)
                if DEBUG:
                    print("\t%s" % rule)
            print("rate of reduction in the size of the program: %s " % (
                            100 - (float(magicRuleNo) / float(progLen)) * 100))

        if REASONING_STRATEGY in ['bfp', 'sld']:  # and not GROUND_QUERY:
            reasoningAlg = TOP_DOWN_METHOD if REASONING_STRATEGY == 'sld' \
                           else BFP_METHOD
            topDownStore = TopDownSPARQLEntailingStore(
                            factGraph.store,
                            factGraph,
                            idb=rules,
                            DEBUG=DEBUG,
                            nsBindings=nsMap,
                            decisionProcedure=reasoningAlg,
                            identifyHybridPredicates=REASONING_STRATEGY == 'bfp')
            targetGraph = Graph(topDownStore)
            for pref,nsUri in nsMap.items():
                targetGraph.bind(pref,nsUri)
            start = time.time()

            for goal in goals:
                queryLiteral = EDBQuery([BuildUnitermFromTuple(goal)],
                                        factGraph,
                                        None if GROUND_QUERY else [goal[0]])
                query = queryLiteral.asSPARQL()
                print "Goal to solve ", query
                rt = targetGraph.query(query, initNs=nsMap)
                if GROUND_QUERY:
                    self.failUnless(rt.askAnswer[0],"Failed top-down problem")
                else:
                    if (goalDict[goal]) not in rt or DEBUG:
                        for network,_goal in topDownStore.queryNetworks:
                            print network,_goal
                            network.reportConflictSet(True)
                        for query in topDownStore.edbQueries:
                            print query.asSPARQL()
                    self.failUnless((goalDict[goal]) in rt,
                                    "Failed top-down problem")
            sTime = time.time() - start
            if sTime > 1:
                sTimeStr = "%s seconds" % sTime
            else:
                sTime = sTime * 1000
                sTimeStr = "%s milli seconds" % sTime
            return sTimeStr
        elif REASONING_STRATEGY == 'gms':
            for goal in goals:
                adornedGoalSeed = AdornLiteral(goal).makeMagicPred()
                goal = adornedGoalSeed.toRDFTuple()
                if DEBUG:
                    print("Magic seed fact %s" % adornedGoalSeed)
                factGraph.add(goal)
            timing = self.calculateEntailments(factGraph)
            for goal in goals:
                # self.failUnless(goal in self.network.inferredFacts or goal in factGraph,
                #                 "Failed GMS query")
                if goal not in self.network.inferredFacts and goal not in factGraph:
                    print("missing triple %s" % (pformat(goal)))
                    pprint(list(factGraph.adornedProgram))
                    # from FuXi.Rete.Util import renderNetwork
                    # dot=renderNetwork(self.network,self.network.nsMap).write_jpeg('test-fail.jpeg')
                    self.network.reportConflictSet(True)
                    raise  # Exception ("Failed test: "+feature)
                else:
                    print("=== Passed! ===")
            return timing
Esempio n. 5
0
    def MagicOWLProof(self, goals, rules, factGraph, conclusionFile):
        progLen = len(rules)
        magicRuleNo = 0
        dPreds = []
        for rule in AdditionalRules(factGraph):
            rules.append(rule)
        if not GROUND_QUERY and REASONING_STRATEGY != 'gms':
            goalDict = dict([((Variable('SUBJECT'), goalP, goalO), goalS)
                             for goalS, goalP, goalO in goals])
            goals = goalDict.keys()
        assert goals

        if REASONING_STRATEGY == 'gms':
            for rule in MagicSetTransformation(factGraph,
                                               rules,
                                               goals,
                                               dPreds):
                magicRuleNo += 1
                self.network.buildNetworkFromClause(rule)
                self.network.rules.add(rule)
                if DEBUG:
                    log.debug("\t", rule)
            log.debug("rate of reduction in the size of the program: ",
                      (100 - (float(magicRuleNo) / float(progLen)) * 100))

        if REASONING_STRATEGY in ['bfp', 'sld']:  # and not GROUND_QUERY:
            reasoningAlg = TOP_DOWN_METHOD if REASONING_STRATEGY == 'sld' \
                else BFP_METHOD
            topDownStore = TopDownSPARQLEntailingStore(
                factGraph.store,
                factGraph,
                idb=rules,
                DEBUG=DEBUG,
                nsBindings=nsMap,
                decisionProcedure=reasoningAlg,
                identifyHybridPredicates=REASONING_STRATEGY == 'bfp')
            targetGraph = Graph(topDownStore)
            for pref, nsUri in nsMap.items():
                targetGraph.bind(pref, nsUri)
            start = time.time()

            for goal in goals:
                queryLiteral = EDBQuery([BuildUnitermFromTuple(goal)],
                                        factGraph,
                                        None if GROUND_QUERY else [goal[0]])
                query = queryLiteral.asSPARQL()
                log.debug("Goal to solve ", query)
                rt = targetGraph.query(query, initNs=nsMap)
                if GROUND_QUERY:
                    self.failUnless(rt.askAnswer[0], "Failed top-down problem")
                else:
                    if (goalDict[goal]) not in rt or DEBUG:
                        for network, _goal in topDownStore.queryNetworks:
                            log.debug(network, _goal)
                            network.reportConflictSet(True)
                        for query in topDownStore.edbQueries:
                            log.debug(query.asSPARQL())
                    self.failUnless((goalDict[goal]) in rt,
                                    "Failed top-down problem")
            sTime = time.time() - start
            if sTime > 1:
                sTimeStr = "%s seconds" % sTime
            else:
                sTime = sTime * 1000
                sTimeStr = "%s ms" % sTime
            return sTimeStr
        elif REASONING_STRATEGY == 'gms':
            for goal in goals:
                adornedGoalSeed = AdornLiteral(goal).makeMagicPred()
                goal = adornedGoalSeed.toRDFTuple()
                if DEBUG:
                    log.debug("Magic seed fact ", adornedGoalSeed)
                factGraph.add(goal)
            timing = self.calculateEntailments(factGraph)
            for goal in goals:
                # self.failUnless(goal in self.network.inferredFacts or goal in factGraph,
                #                 "Failed GMS query")
                if goal not in self.network.inferredFacts and goal not in factGraph:
                    log.debug("missing triple %s" % (pformat(goal)))
                    # print(list(factGraph.adornedProgram))
                    # from FuXi.Rete.Util import renderNetwork
                    # dot = renderNetwork(
                    #   self.network,self.network.nsMap).write_jpeg('test-fail.jpeg')
                    self.network.reportConflictSet(True)
                    log.debug("=== Failed: %s ====" % pformat(goal))
                else:
                    log.debug("=== Passed! ===")
            return timing
Esempio n. 6
0
def SipStrategy(query,
                sipCollection,
                factGraph,
                derivedPreds,
                bindings={},
                processedRules=None,
                network=None,
                debug=False,
                buildProof=False,
                memoizeMemory=None,
                proofLevel=1):
    """
    Accordingly, we define a sip-strategy for computing the answers to a query 
    expressed using a set of Datalog rules, and a set of sips, one for each 
    adornment of a rule head, as follows...
    
    Each evaluation uses memoization (via Python decorators) but also relies on well-formed 
    rewrites for using semi-naive bottom up method over large SPARQL data.
    
    """
    memoizeMemory = memoizeMemory and memoizeMemory or {}
    queryLiteral = BuildUnitermFromTuple(query)
    processedRules = processedRules and processedRules or set()
    if bindings:
        #There are bindings.  Apply them to the terms in the query
        queryLiteral.ground(bindings)

    if debug:
        print >> sys.stderr, "%sSolving" % ('\t' *
                                            proofLevel), queryLiteral, bindings
    #Only consider ground triple pattern isomorphism with matching bindings
    goalRDFStatement = queryLiteral.toRDFTuple()

    if queryLiteral in memoizeMemory:
        if debug:
            print >> sys.stderr, "%sReturning previously calculated results for "%\
                        ('\t'*proofLevel), queryLiteral
        for answers in memoizeMemory[queryLiteral]:
            yield answers
    elif AlphaNode(goalRDFStatement).alphaNetworkHash(
                                      True,
                                      skolemTerms=bindings.values()) in\
        [AlphaNode(r.toRDFTuple()).alphaNetworkHash(True,
                                                    skolemTerms=bindings.values())
            for r in processedRules
                if AdornLiteral(goalRDFStatement).adornment == \
                   r.adornment]:
        if debug:
            print >> sys.stderr, "%s Goal already processed..."%\
                ('\t'*proofLevel)
    else:
        isGround = literalIsGround(queryLiteral)
        if buildProof:
            ns = NodeSet(goalRDFStatement, network=network, identifier=BNode())
        else:
            ns = None
        adornedProgram = factGraph.adornedProgram
        queryPred = GetOp(queryLiteral)
        if sipCollection is None:
            rules = []
        else:
            #For every rule head matching the query, we invoke the rule,
            #thus determining an adornment, and selecting a sip to follow
            rules = sipCollection.headToRule.get(queryPred, set())
            if None in sipCollection.headToRule:
                #If there are second order rules, we add them
                #since they are a 'wildcard'
                rules.update(sipCollection.headToRule[None])

        #maintained list of rules that haven't been processed before and
        #match the query
        validRules = []

        #each subquery contains values for the bound arguments that are passed
        #through the sip arcs entering the node corresponding to that literal. For
        #each subquery generated, there is a set of answers.
        answers = []

        variableMapping = {}

        #Some TBox queries can be 'joined' together into SPARQL queries against
        #'base' predicates via an RDF dataset
        #These atomic concept inclusion axioms can be evaluated together
        #using a disjunctive operator at the body of a horn clause
        #where each item is a query of the form uniPredicate(?X):
        #Or( uniPredicate1(?X1), uniPredicate2(?X), uniPredicate3(?X),..)
        #In this way massive, conjunctive joins can be 'mediated'
        #between the stated facts and the top-down solver
        @parameterizedPredicate([i for i in derivedPreds])
        def IsAtomicInclusionAxiomRHS(rule, dPreds):
            """
            This is an atomic inclusion axiom with
            a variable (or bound) RHS:  uniPred(?ENTITY)
            """
            bodyList = list(iterCondition(rule.formula.body))
            body = first(bodyList)
            return GetOp(body) not in dPreds and \
                   len(bodyList) == 1 and \
                   body.op == RDF.type

        atomicInclusionAxioms = list(ifilter(IsAtomicInclusionAxiomRHS, rules))
        if atomicInclusionAxioms and len(atomicInclusionAxioms) > 1:
            if debug:
                print >> sys.stderr, "\tCombining atomic inclusion axioms: "
                pprint(atomicInclusionAxioms, sys.stderr)
            if buildProof:
                factStep = InferenceStep(ns, source='some RDF graph')
                ns.steps.append(factStep)

            axioms = [rule.formula.body for rule in atomicInclusionAxioms]

            #attempt to exaustively apply any available substitutions
            #and determine if query if fully ground
            vars = [
                v for v in GetArgs(queryLiteral, secondOrder=True)
                if isinstance(v, Variable)
            ]
            openVars,axioms,_bindings  = \
                    normalizeBindingsAndQuery(vars,
                                              bindings,
                                              axioms)
            if openVars:
                mappings = {}
                #See if we need to do any variable mappings from the query literals
                #to the literals in the applicable rules
                query, rt = EDBQuery(axioms, factGraph, openVars,
                                     _bindings).evaluate(
                                         debug, symmAtomicInclusion=True)
                if buildProof:
                    factStep.groundQuery = subquery
                for ans in rt:
                    if buildProof:
                        factStep.bindings.update(ans)
                    memoizeMemory.setdefault(queryLiteral, set()).add(
                        (prepMemiozedAns(ans), ns))
                    yield ans, ns
            else:
                #All the relevant derivations have been explored and the result
                #is a ground query we can directly execute against the facts
                if buildProof:
                    factStep.bindings.update(bindings)
                query, rt = EDBQuery(axioms, factGraph, _bindings).evaluate(
                    debug, symmAtomicInclusion=True)
                if buildProof:
                    factStep.groundQuery = subquery
                memoizeMemory.setdefault(queryLiteral, set()).add(
                    (prepMemiozedAns(rt), ns))
                yield rt, ns
            rules = ifilter(lambda i: not IsAtomicInclusionAxiomRHS(i), rules)
        for rule in rules:
            #An exception is the special predicate ph; it is treated as a base
            #predicate and the tuples in it are those supplied for qb by unification.
            headBindings = getBindingsFromLiteral(goalRDFStatement,
                                                  rule.formula.head)
            comboBindings = dict([(k, v) for k, v in itertools.chain(
                bindings.items(), headBindings.items())])
            varMap = rule.formula.head.getVarMapping(queryLiteral)
            if headBindings and\
                [term for term in rule.formula.head.getDistinguishedVariables(True)
                        if varMap.get(term,term) not in headBindings]:
                continue
            subQueryAnswers = []
            dontStop = True
            projectedBindings = comboBindings.copy()
            if debug:
                print >> sys.stderr, "%sProcessing rule"%\
                ('\t'*proofLevel), rule.formula
                if debug and sipCollection:
                    print >> sys.stderr, "Sideways Information Passing (sip) graph for %s: " % queryLiteral
                    print >> sys.stdout, sipCollection.serialize(format='n3')
                    for sip in SIPRepresentation(sipCollection):
                        print >> sys.stderr, sip
            try:
                #Invoke the rule
                if buildProof:
                    step = InferenceStep(ns, rule.formula)
                else:
                    step = None
                for rt,step in\
                  invokeRule([headBindings],
                              iter(iterCondition(rule.formula.body)),
                              rule.sip,
                              (proofLevel + 1,
                               memoizeMemory,
                               sipCollection,
                               factGraph,
                               derivedPreds,
                               processedRules.union([
                                 AdornLiteral(query)])),
                              step=step,
                              debug = debug):
                    if rt:
                        if isinstance(rt, dict):
                            #We received a mapping and must rewrite it via
                            #correlation between the variables in the rule head
                            #and the variables in the original query (after applying
                            #bindings)
                            varMap = rule.formula.head.getVarMapping(
                                queryLiteral)
                            if varMap:
                                rt = MakeImmutableDict(
                                    refactorMapping(varMap, rt))
                            if buildProof:
                                step.bindings = rt
                        else:
                            if buildProof:
                                step.bindings = headBindings
                        validRules.append(rule)
                        if buildProof:
                            ns.steps.append(step)
                        if isGround:
                            yield True, ns
                        else:
                            memoizeMemory.setdefault(queryLiteral, set()).add(
                                (prepMemiozedAns(rt), ns))
                            yield rt, ns

            except RuleFailure, e:
                #Clean up failed antecedents
                if buildProof:
                    if ns in step.antecedents:
                        step.antecedents.remove(ns)
        if not validRules:
            #No rules matching, query factGraph for answers
            successful = False
            if buildProof:
                factStep = InferenceStep(ns, source='some RDF graph')
                ns.steps.append(factStep)
            if not isGround:
                subquery, rt = EDBQuery([queryLiteral], factGraph, [
                    v for v in GetArgs(queryLiteral, secondOrder=True)
                    if isinstance(v, Variable)
                ], bindings).evaluate(debug)
                if buildProof:
                    factStep.groundQuery = subquery
                for ans in rt:
                    successful = True
                    if buildProof:
                        factStep.bindings.update(ans)
                    memoizeMemory.setdefault(queryLiteral, set()).add(
                        (prepMemiozedAns(ans), ns))
                    yield ans, ns
                if not successful and queryPred not in derivedPreds:
                    #Open query didn't return any results and the predicate
                    #is ostensibly marked as derived predicate, so we have failed
                    memoizeMemory.setdefault(queryLiteral, set()).add(
                        (False, ns))
                    yield False, ns
            else:
                #All the relevant derivations have been explored and the result
                #is a ground query we can directly execute against the facts
                if buildProof:
                    factStep.bindings.update(bindings)

                subquery, rt = EDBQuery([queryLiteral], factGraph,
                                        bindings).evaluate(debug)
                if buildProof:
                    factStep.groundQuery = subquery
                memoizeMemory.setdefault(queryLiteral, set()).add(
                    (prepMemiozedAns(rt), ns))
                yield rt, ns
Esempio n. 7
0
from FuXi.Rete.RuleStore import SetupRuleStore
from FuXi.Rete.Util import generateTokenSet
from FuXi.Horn.HornRules import HornFromN3
from FuXi.Rete.Magic import MagicSetTransformation, AdornLiteral
from FuXi.SPARQL import RDFTuplesToSPARQL

exNs = Namespace('http://dev.w3.org/2000/10/swap/test/cwm/fam.n3#')

rules = HornFromN3('http://dev.w3.org/2000/10/swap/test/cwm/fam-rules.n3')
factGraph = Graph().parse('http://dev.w3.org/2000/10/swap/test/cwm/fam.n3',format='n3')
factGraph.bind(u'ex',exNs)
dPreds = [exNs.ancestor]

rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
network.nsMap = {u'ex':exNs}
closureDeltaGraph=Graph()
network.inferredFacts = closureDeltaGraph

goals = [(exNs.david,exNs.ancestor,Variable('ANCESTOR'))]
for rule in MagicSetTransformation(factGraph,rules,goals,dPreds):
    network.buildNetworkFromClause(rule)    
    # network.rules.add(rule)
    print ("\t", rule)
    
goalLit = AdornLiteral(goals[0])
adornedGoalSeed = goalLit.makeMagicPred()
goal=adornedGoalSeed.toRDFTuple()
print (RDFTuplesToSPARQL([goalLit],factGraph,vars=[Variable('ANCESTOR')])
SELECT ?ANCESTOR {      <http://dev.w3.org/2000/10/swap/test/cwm/fam.n3#david> <http://dev.w3.org/2000/10/swap/test/cwm/fam.n3#ancestor> ?ANCESTOR })