Exemple #1
0
 def test_namespaces_via_manager(self):
     """
     This tests that NamespaceManager.namespaces works correctly with an
     abstract Store.
     """
     namespace_manager = NamespaceManager(Graph(store=Store()))
     self.assertEqual(list(namespace_manager.namespaces()), [])
Exemple #2
0
    def toURIRef(self, manager: NamespaceManager) -> URIRef:
        if ":" not in self.name:
            return None

        head, tail = self.name.split(':', 1)
        for prefix, ns in manager.namespaces():
            if prefix == head:
                return ns + tail
        return URIRef(self.name)
Exemple #3
0
def get_namespaces(graph):
    """

    :param graph:
    :return:
    """
    try:
        ns_manager = NamespaceManager(graph=graph)
        return {n[0]: n[1] for n in ns_manager.namespaces()}
    except Exception as e:
        print(e)
    return {}
Exemple #4
0
    def toURIRef(self,manager:NamespaceManager) -> URIRef:
        """
        Convert to URI Reference

        :param manager: :class:`rdflib.namespace.NamespaceManager` used to resolve namespace
        :return: A :class:`rdflib.URIRef`
        """
        if ":" not in self.name:
            return None

        head,tail=self.name.split(':',1)
        for prefix,ns in manager.namespaces():
            if prefix==head:
                return ns+tail
        return URIRef(self.name)
Exemple #5
0
    def toURIRef(self, manager: NamespaceManager) -> URIRef:
        """
        Convert to URI Reference

        :param manager: :class:`rdflib.namespace.NamespaceManager` used to resolve namespace
        :return: A :class:`rdflib.URIRef`
        """
        if ":" not in self.name:
            return None

        head, tail = self.name.split(':', 1)
        for prefix, ns in manager.namespaces():
            if prefix == head:
                return ns + tail
        return URIRef(self.name)
Exemple #6
0
class BaseGraph(Graph):
    def __init__(self, *args, **kw):
        super().__init__(*args, **kw)
        self.loop = asyncio.get_event_loop()

    def iNsBind(self):
        self.iNs = NamespaceManager(self)

        # Bind some useful things in the NS manager
        for ns, uri in nsBindings.items():
            if isinstance(uri, list):
                [self.iNs.bind(ns, Namespace(u)) for u in uri]
            else:
                self.iNs.bind(ns, Namespace(uri))

    @property
    def nameSpaces(self):
        return [n for n in self.iNs.namespaces()]

    def _serial(self, fmt):
        buff = io.BytesIO()
        self.serialize(buff, format=fmt)
        buff.seek(0, 0)
        return buff.getvalue()

    async def ttlize(self):
        return await self.loop.run_in_executor(None, self._serial, 'ttl')

    async def xmlize(self):
        return await self.loop.run_in_executor(None, self._serial,
                                               'pretty-xml')

    async def queryAsync(self, query, initBindings=None):
        def runQuery(q, bindings):
            return self.query(q, initBindings=bindings)

        return await self.loop.run_in_executor(runningApp().executor, runQuery,
                                               query, initBindings)
Exemple #7
0
def main():
    from optparse import OptionParser
    op = OptionParser(
        'usage: %prog [options] factFile1 factFile2 ... factFileN')

    op.add_option(
        '--why',
        default=None,
        help='Specifies the goals to solve for using the non-naive methods' +
        'see --method')

    op.add_option(
        '--closure',
        action='store_true',
        default=False,
        help='Whether or not to serialize the inferred triples' +
        ' along with the original triples.  Otherwise ' +
        '(the default behavior), serialize only the inferred triples')

    op.add_option(
        '--imports',
        action='store_true',
        default=False,
        help='Whether or not to follow owl:imports in the fact graph')

    op.add_option(
        '--output',
        default='n3',
        metavar='RDF_FORMAT',
        choices=[
            'xml', 'TriX', 'n3', 'pml', 'proof-graph', 'nt', 'rif', 'rif-xml',
            'conflict', 'man-owl'
        ],
        help=
        "Serialize the inferred triples and/or original RDF triples to STDOUT "
        +
        "using the specified RDF syntax ('xml', 'pretty-xml', 'nt', 'turtle', "
        +
        "or 'n3') or to print a summary of the conflict set (from the RETE " +
        "network) if the value of this option is 'conflict'.  If the the " +
        " value is 'rif' or 'rif-xml', Then the rules used for inference " +
        "will be serialized as RIF.  If the value is 'pml' and --why is used, "
        + " then the PML RDF statements are serialized.  If output is " +
        "'proof-graph then a graphviz .dot file of the proof graph is printed. "
        +
        "Finally if the value is 'man-owl', then the RDF facts are assumed " +
        "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default"
    )

    op.add_option(
        '--class',
        dest='classes',
        action='append',
        default=[],
        metavar='QNAME',
        help='Used with --output=man-owl to determine which ' +
        'classes within the entire OWL/RDF are targetted for serialization' +
        '.  Can be used more than once')

    op.add_option(
        '--hybrid',
        action='store_true',
        default=False,
        help='Used with with --method=bfp to determine whether or not to ' +
        'peek into the fact graph to identify predicates that are both ' +
        'derived and base.  This is expensive for large fact graphs' +
        'and is explicitely not used against SPARQL endpoints')

    op.add_option(
        '--property',
        action='append',
        dest='properties',
        default=[],
        metavar='QNAME',
        help='Used with --output=man-owl or --extract to determine which ' +
        'properties are serialized / extracted.  Can be used more than once')

    op.add_option(
        '--normalize',
        action='store_true',
        default=False,
        help=
        "Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"
        + "The default is %default")

    op.add_option(
        '--ddlGraph',
        default=False,
        help=
        "The location of a N3 Data Description document describing the IDB predicates"
    )

    op.add_option(
        '--input-format',
        default='xml',
        dest='inputFormat',
        metavar='RDF_FORMAT',
        choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
        help=
        "The format of the RDF document(s) which serve as the initial facts " +
        " for the RETE network. One of 'xml', 'n3', 'trix', 'nt', " +
        "or 'rdfa'.  The default is %default")

    op.add_option(
        '--safety',
        default='none',
        metavar='RULE_SAFETY',
        choices=['loose', 'strict', 'none'],
        help="Determines how to handle RIF Core safety.  A value of 'loose' " +
        " means that unsafe rules will be ignored.  A value of 'strict' " +
        " will cause a syntax exception upon any unsafe rule.  A value of " +
        "'none' (the default) does nothing")

    op.add_option(
        '--pDSemantics',
        action='store_true',
        default=False,
        help=
        'Used with --dlp to add pD semantics ruleset for semantics not covered '
        + 'by DLP but can be expressed in definite Datalog Logic Programming' +
        ' The default is %default')

    op.add_option(
        '--stdin',
        action='store_true',
        default=False,
        help=
        'Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default '
    )

    op.add_option(
        '--ns',
        action='append',
        default=[],
        metavar="PREFIX=URI",
        help='Register a namespace binding (QName prefix to a base URI).  This '
        + 'can be used more than once')

    op.add_option(
        '--rules',
        default=[],
        action='append',
        metavar='PATH_OR_URI',
        help='The Notation 3 documents to use as rulesets for the RETE network'
        + '.  Can be specified more than once')

    op.add_option('-d',
                  '--debug',
                  action='store_true',
                  default=True,
                  help='Include debugging output')

    op.add_option(
        '--strictness',
        default='defaultBase',
        metavar='DDL_STRICTNESS',
        choices=['loose', 'defaultBase', 'defaultDerived', 'harsh'],
        help=
        'Used with --why to specify whether to: *not* check if predicates are '
        +
        ' both derived and base (loose), if they are, mark as derived (defaultDerived) '
        +
        'or as base (defaultBase) predicates, else raise an exception (harsh)')

    op.add_option(
        '--method',
        default='naive',
        metavar='reasoning algorithm',
        choices=['gms', 'bfp', 'naive'],
        help='Used with --why to specify how to evaluate answers for query.  '
        + 'One of: gms, sld, bfp, naive')

    op.add_option(
        '--firstAnswer',
        default=False,
        action='store_true',
        help=
        'Used with --why to determine whether to fetch all answers or just ' +
        'the first')

    op.add_option(
        '--edb',
        default=[],
        action='append',
        metavar='EXTENSIONAL_DB_PREDICATE_QNAME',
        help=
        'Used with --why/--strictness=defaultDerived to specify which clashing '
        + 'predicate will be designated as a base predicate')

    op.add_option(
        '--idb',
        default=[],
        action='append',
        metavar='INTENSIONAL_DB_PREDICATE_QNAME',
        help=
        'Used with --why/--strictness=defaultBase to specify which clashing ' +
        'predicate will be designated as a derived predicate')

    op.add_option(
        '--hybridPredicate',
        default=[],
        action='append',
        metavar='PREDICATE_QNAME',
        help=
        'Used with --why to explicitely specify a hybrid predicate (in both ' +
        ' IDB and EDB) ')

    op.add_option(
        '--noMagic',
        default=[],
        action='append',
        metavar='DB_PREDICATE_QNAME',
        help='Used with --why to specify that the predicate shouldnt have its '
        + 'magic sets calculated')

    op.add_option(
        '--filter',
        action='append',
        default=[],
        metavar='PATH_OR_URI',
        help=
        'The Notation 3 documents to use as a filter (entailments do not particpate in network)'
    )

    op.add_option(
        '--ruleFacts',
        action='store_true',
        default=False,
        help="Determines whether or not to attempt to parse initial facts from "
        + "the rule graph.  The default is %default")

    op.add_option(
        '--builtins',
        default=False,
        metavar='PATH_TO_PYTHON_MODULE',
        help="The path to a python module with function definitions (and a " +
        "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations"
    )

    op.add_option(
        '--dlp',
        action='store_true',
        default=False,
        help=
        'Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default'
    )

    op.add_option(
        '--sparqlEndpoint',
        action='store_true',
        default=False,
        help=
        'Indicates that the sole argument is the URI of a SPARQL endpoint to query'
    )

    op.add_option(
        '--ontology',
        action='append',
        default=[],
        metavar='PATH_OR_URI',
        help=
        'The path to an OWL RDF/XML graph to use DLP to extract rules from ' +
        '(other wise, fact graph(s) are used)  ')

    op.add_option(
        '--ontologyFormat',
        default='xml',
        dest='ontologyFormat',
        metavar='RDF_FORMAT',
        choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
        help=
        "The format of the OWL RDF/XML graph specified via --ontology.  The default is %default"
    )

    op.add_option(
        '--builtinTemplates',
        default=None,
        metavar='N3_DOC_PATH_OR_URI',
        help=
        'The path to an N3 document associating SPARQL FILTER templates to ' +
        'rule builtins')

    op.add_option('--negation',
                  action='store_true',
                  default=False,
                  help='Extract negative rules?')

    op.add_option(
        '--normalForm',
        action='store_true',
        default=False,
        help='Whether or not to reduce DL axioms & LP rules to a normal form')
    (options, facts) = op.parse_args()

    nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref, nsUri = nsBind.split('=')
        nsBinds[pref] = nsUri

    namespace_manager = NamespaceManager(Graph())
    if options.sparqlEndpoint:
        factGraph = Graph(plugin.get('SPARQLStore', Store)(facts[0]))
        options.hybrid = False
    else:
        factGraph = Graph()
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts and not options.sparqlEndpoint:
            factGraph.parse(fileN, format='n3')
            print("Parsing RDF facts from ", fileN)
        if options.builtins:
            import imp
            userFuncs = imp.load_source('builtins', options.builtins)
            rs = HornFromN3(fileN,
                            additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
        else:
            rs = HornFromN3(fileN)
        nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)
        #ruleGraph.parse(fileN, format='n3')

    ruleSet.nsMapping = nsBinds

    for prefix, uri in list(nsBinds.items()):
        namespace_manager.bind(prefix, uri, override=False)
    closureDeltaGraph = Graph()
    closureDeltaGraph.namespace_manager = namespace_manager
    factGraph.namespace_manager = namespace_manager

    if not options.sparqlEndpoint:
        for fileN in facts:
            factGraph.parse(fileN, format=options.inputFormat)
            if options.imports:
                for owlImport in factGraph.objects(predicate=OWL_NS.imports):
                    factGraph.parse(owlImport)
                    print("Parsed Semantic Web Graph.. ", owlImport)

    if not options.sparqlEndpoint and facts:
        for pref, uri in factGraph.namespaces():
            nsBinds[pref] = uri

    if options.stdin:
        assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint"
        factGraph.parse(sys.stdin, format=options.inputFormat)

    #Normalize namespace mappings
    #prune redundant, rdflib-allocated namespace prefix mappings
    newNsMgr = NamespaceManager(factGraph)
    from FuXi.Rete.Util import CollapseDictionary
    for k, v in list(
            CollapseDictionary(
                dict([(k, v) for k, v in factGraph.namespaces()])).items()):
        newNsMgr.bind(k, v)
    factGraph.namespace_manager = newNsMgr

    if options.normalForm:
        NormalFormReduction(factGraph)

    if not options.sparqlEndpoint:
        workingMemory = generateTokenSet(factGraph)
    if options.builtins:
        import imp
        userFuncs = imp.load_source('builtins', options.builtins)
        rule_store, rule_graph, network = SetupRuleStore(
            makeNetwork=True, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
    else:
        rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    network.inferredFacts = closureDeltaGraph
    network.nsMap = nsBinds

    if options.dlp:
        from FuXi.DLP.DLNormalization import NormalFormReduction
        if options.ontology:
            ontGraph = Graph()
            for fileN in options.ontology:
                ontGraph.parse(fileN, format=options.ontologyFormat)
                for prefix, uri in ontGraph.namespaces():
                    nsBinds[prefix] = uri
                    namespace_manager.bind(prefix, uri, override=False)
                    if options.sparqlEndpoint:
                        factGraph.store.bind(prefix, uri)
        else:
            ontGraph = factGraph
        NormalFormReduction(ontGraph)
        dlp = network.setupDescriptionLogicProgramming(
            ontGraph,
            addPDSemantics=options.pDSemantics,
            constructNetwork=False,
            ignoreNegativeStratus=options.negation,
            safety=safetyNameMap[options.safety])
        ruleSet.formulae.extend(dlp)
    if options.output == 'rif' and not options.why:
        for rule in ruleSet:
            print(rule)
        if options.negation:
            for nRule in network.negRules:
                print(nRule)

    elif options.output == 'man-owl':
        cGraph = network.closureGraph(factGraph, readOnly=False)
        cGraph.namespace_manager = namespace_manager
        Individual.factoryGraph = cGraph
        if options.classes:
            mapping = dict(namespace_manager.namespaces())
            for c in options.classes:
                pref, uri = c.split(':')
                print(Class(URIRef(mapping[pref] + uri)).__repr__(True))
        elif options.properties:
            mapping = dict(namespace_manager.namespaces())
            for p in options.properties:
                pref, uri = p.split(':')
                print(Property(URIRef(mapping[pref] + uri)))
        else:
            for p in AllProperties(cGraph):
                print(p.identifier, first(p.label))
                print(repr(p))
            for c in AllClasses(cGraph):
                if options.normalize:
                    if c.isPrimitive():
                        primAnc = [
                            sc for sc in c.subClassOf if sc.isPrimitive()
                        ]
                        if len(primAnc) > 1:
                            warnings.warn(
                                "Branches of primitive skeleton taxonomy" +
                                " should form trees: %s has %s primitive parents: %s"
                                % (c.qname, len(primAnc), primAnc),
                                UserWarning, 1)
                        children = [desc for desc in c.subSumpteeIds()]
                        for child in children:
                            for otherChild in [
                                    o for o in children if o is not child
                            ]:
                                if not otherChild in [
                                        c.identifier
                                        for c in Class(child).disjointWith
                                ]:  # and \
                                    warnings.warn(
                                        "Primitive children (of %s) " % (c.qname) + \
                                        "must be mutually disjoint: %s and %s" % (
                                    Class(child).qname, Class(otherChild).qname), UserWarning, 1)
                # if not isinstance(c.identifier, BNode):
                print(c.__repr__(True))

    if not options.why:
        # Naive construction of graph
        for rule in ruleSet:
            network.buildNetworkFromClause(rule)

    magicSeeds = []
    if options.why:
        builtinTemplateGraph = Graph()
        if options.builtinTemplates:
            builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                 format='n3')
        factGraph.templateMap = \
            dict([(pred, template)
                      for pred, _ignore, template in
                            builtinTemplateGraph.triples(
                                (None,
                                 TEMPLATES.filterTemplate,
                                 None))])
        goals = []
        query = ParseSPARQL(options.why)
        network.nsMap['pml'] = PML
        network.nsMap['gmp'] = GMP_NS
        network.nsMap['owl'] = OWL_NS
        nsBinds.update(network.nsMap)
        network.nsMap = nsBinds
        if not query.prologue:
            query.prologue = Prologue(None, [])
            query.prologue.prefixBindings.update(nsBinds)
        else:
            for prefix, nsInst in list(nsBinds.items()):
                if prefix not in query.prologue.prefixBindings:
                    query.prologue.prefixBindings[prefix] = nsInst
        print("query.prologue", query.prologue)
        print("query.query", query.query)
        print("query.query.whereClause", query.query.whereClause)
        print("query.query.whereClause.parsedGraphPattern",
              query.query.whereClause.parsedGraphPattern)
        goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern(
            query.query.whereClause.parsedGraphPattern,
            query.prologue).patterns])
        # dPreds=[]# p for s, p, o in goals ]
        # print("goals", goals)
        magicRuleNo = 0
        bottomUpDerivedPreds = []
        # topDownDerivedPreds  = []
        defaultBasePreds = []
        defaultDerivedPreds = set()
        hybridPredicates = []
        mapping = dict(newNsMgr.namespaces())
        for edb in options.edb:
            pref, uri = edb.split(':')
            defaultBasePreds.append(URIRef(mapping[pref] + uri))
        noMagic = []
        for pred in options.noMagic:
            pref, uri = pred.split(':')
            noMagic.append(URIRef(mapping[pref] + uri))
        if options.ddlGraph:
            ddlGraph = Graph().parse(options.ddlGraph, format='n3')
            # @TODO: should also get hybrid predicates from DDL graph
            defaultDerivedPreds = IdentifyDerivedPredicates(
                ddlGraph, Graph(), ruleSet)
        else:
            for idb in options.idb:
                pref, uri = idb.split(':')
                defaultDerivedPreds.add(URIRef(mapping[pref] + uri))
            defaultDerivedPreds.update(
                set([p == RDF.type and o or p for s, p, o in goals]))
            for hybrid in options.hybridPredicate:
                pref, uri = hybrid.split(':')
                hybridPredicates.append(URIRef(mapping[pref] + uri))

        if options.method == 'gms':
            for goal in goals:
                goalSeed = AdornLiteral(goal).makeMagicPred()
                print("Magic seed fact (used in bottom-up evaluation)",
                      goalSeed)
                magicSeeds.append(goalSeed.toRDFTuple())
            if noMagic:
                print("Predicates whose magic sets will not be calculated")
                for p in noMagic:
                    print("\t", factGraph.qname(p))
            for rule in MagicSetTransformation(
                    factGraph,
                    ruleSet,
                    goals,
                    derivedPreds=bottomUpDerivedPreds,
                    strictCheck=nameMap[options.strictness],
                    defaultPredicates=(defaultBasePreds, defaultDerivedPreds),
                    noMagic=noMagic):
                magicRuleNo += 1
                network.buildNetworkFromClause(rule)
            if len(list(ruleSet)):
                print("reduction in size of program: %s (%s -> %s clauses)" %
                      (100 -
                       (float(magicRuleNo) / float(len(list(ruleSet)))) * 100,
                       len(list(ruleSet)), magicRuleNo))
            start = time.time()
            network.feedFactsToAdd(generateTokenSet(magicSeeds))
            if not [
                    rule for rule in factGraph.adornedProgram if len(rule.sip)
            ]:
                warnings.warn(
                    "Using GMS sideways information strategy with no " +
                    "information to pass from query.  Falling back to " +
                    "naive method over given facts and rules")
                network.feedFactsToAdd(workingMemory)
            sTime = time.time() - start
            if sTime > 1:
                sTimeStr = "%s seconds" % sTime
            else:
                sTime = sTime * 1000
                sTimeStr = "%s milli seconds" % sTime
            print("Time to calculate closure on working memory: ", sTimeStr)

            if options.output == 'rif':
                print("Rules used for bottom-up evaluation")
                if network.rules:
                    for clause in network.rules:
                        print(clause)
                else:
                    for clause in factGraph.adornedProgram:
                        print(clause)
            if options.output == 'conflict':
                network.reportConflictSet()

        elif options.method == 'bfp':
            topDownDPreds = defaultDerivedPreds
            if options.builtinTemplates:
                builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                     format='n3')
                builtinDict = dict([
                    (pred, template) for pred, _ignore, template in
                    builtinTemplateGraph.triples((None,
                                                  TEMPLATES.filterTemplate,
                                                  None))
                ])
            else:
                builtinDict = None
            topDownStore = TopDownSPARQLEntailingStore(
                factGraph.store,
                factGraph,
                idb=ruleSet,
                DEBUG=options.debug,
                derivedPredicates=topDownDPreds,
                templateMap=builtinDict,
                nsBindings=network.nsMap,
                identifyHybridPredicates=options.hybrid
                if options.method == 'bfp' else False,
                hybridPredicates=hybridPredicates)
            targetGraph = Graph(topDownStore)
            for pref, nsUri in list(network.nsMap.items()):
                targetGraph.bind(pref, nsUri)
            start = time.time()
            # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal) for goal in goals],
            #                         targetGraph)
            # query = queryLiteral.asSPARQL()
            # print("Goal to solve ", query)
            sTime = time.time() - start
            result = targetGraph.query(options.why, initNs=network.nsMap)
            if result.askAnswer:
                sTime = time.time() - start
                if sTime > 1:
                    sTimeStr = "%s seconds" % sTime
                else:
                    sTime = sTime * 1000
                    sTimeStr = "%s milli seconds" % sTime
                print("Time to reach answer ground goal answer of %s: %s" %
                      (result.askAnswer[0], sTimeStr))
            else:
                for rt in result:
                    sTime = time.time() - start
                    if sTime > 1:
                        sTimeStr = "%s seconds" % sTime
                    else:
                        sTime = sTime * 1000
                        sTimeStr = "%s milli seconds" % sTime
                    if options.firstAnswer:
                        break
                    print(
                        "Time to reach answer %s via top-down SPARQL sip strategy: %s"
                        % (rt, sTimeStr))
            if options.output == 'conflict' and options.method == 'bfp':
                for _network, _goal in topDownStore.queryNetworks:
                    print(network, _goal)
                    _network.reportConflictSet(options.debug)
                for query in topDownStore.edbQueries:
                    print(query.asSPARQL())

    elif options.method == 'naive':
        start = time.time()
        network.feedFactsToAdd(workingMemory)
        sTime = time.time() - start
        if sTime > 1:
            sTimeStr = "%s seconds" % sTime
        else:
            sTime = sTime * 1000
            sTimeStr = "%s milli seconds" % sTime
        print("Time to calculate closure on working memory: ", sTimeStr)
        print(network)
        if options.output == 'conflict':
            network.reportConflictSet()

    for fileN in options.filter:
        for rule in HornFromN3(fileN):
            network.buildFilterNetworkFromClause(rule)

    if options.negation and network.negRules and options.method in [
            'both', 'bottomUp'
    ]:
        now = time.time()
        rt = network.calculateStratifiedModel(factGraph)
        print(
            "Time to calculate stratified, stable model (inferred %s facts): %s"
            % (rt, time.time() - now))
    if options.filter:
        print("Applying filter to entailed facts")
        network.inferredFacts = network.filteredFacts

    if options.closure and options.output in RDF_SERIALIZATION_FORMATS:
        cGraph = network.closureGraph(factGraph)
        cGraph.namespace_manager = namespace_manager
        print(
            cGraph.serialize(destination=None,
                             format=options.output,
                             base=None))
    elif options.output and options.output in RDF_SERIALIZATION_FORMATS:
        print(
            network.inferredFacts.serialize(destination=None,
                                            format=options.output,
                                            base=None))
Exemple #8
0
def main():
    from optparse import OptionParser
    op = OptionParser(
      'usage: %prog [options] factFile1 factFile2 ... factFileN')
    op.add_option('--why',
                  default=None,
      help='Specifies the goals to solve for using the non-niave methods' +
              'see --method')
    op.add_option('--closure',
                  action='store_true',
                  default=False,
      help='Whether or not to serialize the inferred triples' +
             ' along with the original triples.  Otherwise ' +
              '(the default behavior), serialize only the inferred triples')
    op.add_option('--imports',
                action='store_true',
                default=False,
    help='Whether or not to follow owl:imports in the fact graph')
    op.add_option('--output',
                  default='n3',
                  metavar='RDF_FORMAT',
                  choices=['xml',
                             'TriX',
                             'n3',
                             'pml',
                             'proof-graph',
                             'nt',
                             'rif',
                             'rif-xml',
                             'conflict',
                             'man-owl'],
      help="Serialize the inferred triples and/or original RDF triples to STDOUT "+
             "using the specified RDF syntax ('xml','pretty-xml','nt','turtle', "+
             "or 'n3') or to print a summary of the conflict set (from the RETE "+
             "network) if the value of this option is 'conflict'.  If the the "+
             " value is 'rif' or 'rif-xml', Then the rules used for inference "+
             "will be serialized as RIF.  If the value is 'pml' and --why is used, "+
             " then the PML RDF statements are serialized.  If output is "+
             "'proof-graph then a graphviz .dot file of the proof graph is printed. "+
             "Finally if the value is 'man-owl', then the RDF facts are assumed "+
             "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default")
    op.add_option('--class',
                  dest='classes',
                  action='append',
                  default=[],
                  metavar='QNAME',
      help='Used with --output=man-owl to determine which '+
             'classes within the entire OWL/RDF are targetted for serialization'+
             '.  Can be used more than once')
    op.add_option('--hybrid',
                  action='store_true',
                  default=False,
      help='Used with with --method=bfp to determine whether or not to '+
             'peek into the fact graph to identify predicates that are both '+
             'derived and base.  This is expensive for large fact graphs'+
             'and is explicitely not used against SPARQL endpoints')
    op.add_option('--property',
                  action='append',
                  dest='properties',
                  default=[],
                  metavar='QNAME',
      help='Used with --output=man-owl or --extract to determine which '+
             'properties are serialized / extracted.  Can be used more than once')
    op.add_option('--normalize',
                  action='store_true',
                  default=False,
      help="Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"+
      "The default is %default")
    op.add_option('--ddlGraph',
                default=False,
      help="The location of a N3 Data Description document describing the IDB predicates")
    op.add_option('--input-format',
                  default='xml',
                  dest='inputFormat',
                  metavar='RDF_FORMAT',
                  choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
      help="The format of the RDF document(s) which serve as the initial facts "+
             " for the RETE network. One of 'xml','n3','trix', 'nt', "+
             "or 'rdfa'.  The default is %default")
    op.add_option('--safety',
                  default='none',
                  metavar='RULE_SAFETY',
                  choices=['loose', 'strict', 'none'],
      help="Determines how to handle RIF Core safety.  A value of 'loose' "+
             " means that unsafe rules will be ignored.  A value of 'strict' "+
             " will cause a syntax exception upon any unsafe rule.  A value of "+
             "'none' (the default) does nothing")
    op.add_option('--pDSemantics',
                  action='store_true',
                  default=False,
      help='Used with --dlp to add pD semantics ruleset for semantics not covered '+
      'by DLP but can be expressed in definite Datalog Logic Programming'+
      ' The default is %default')
    op.add_option('--stdin',
                  action='store_true',
                  default=False,
      help='Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ')
    op.add_option('--ns',
                  action='append',
                  default=[],
                  metavar="PREFIX=URI",
      help='Register a namespace binding (QName prefix to a base URI).  This '+
             'can be used more than once')
    op.add_option('--rules',
                  default=[],
                  action='append',
                  metavar='PATH_OR_URI',
      help='The Notation 3 documents to use as rulesets for the RETE network'+
      '.  Can be specified more than once')
    op.add_option('-d', '--debug', action='store_true', default=False,
      help='Include debugging output')
    op.add_option('--strictness',
                  default='defaultBase',
                  metavar='DDL_STRICTNESS',
                  choices=['loose',
                             'defaultBase',
                             'defaultDerived',
                             'harsh'],
      help='Used with --why to specify whether to: *not* check if predicates are '+
      ' both derived and base (loose), if they are, mark as derived (defaultDerived) '+
      'or as base (defaultBase) predicates, else raise an exception (harsh)')
    op.add_option('--method',
                  default='naive',
                  metavar='reasoning algorithm',
                  choices=['gms', 'bfp', 'naive'],
      help='Used with --why to specify how to evaluate answers for query.  '+
      'One of: gms,bfp,naive')
    op.add_option('--firstAnswer',
                  default=False,
                  action='store_true',
      help='Used with --why to determine whether to fetch all answers or just '+
      'the first')
    op.add_option('--edb',
                  default=[],
                  action='append',
                  metavar='EXTENSIONAL_DB_PREDICATE_QNAME',
      help='Used with --why/--strictness=defaultDerived to specify which clashing '+
      'predicate will be designated as a base predicate')
    op.add_option('--idb',
                  default=[],
                  action='append',
                  metavar='INTENSIONAL_DB_PREDICATE_QNAME',
      help='Used with --why/--strictness=defaultBase to specify which clashing '+
      'predicate will be designated as a derived predicate')
    op.add_option('--hybridPredicate',
                default=[],
                action='append',
                metavar='PREDICATE_QNAME',
    help='Used with --why to explicitely specify a hybrid predicate (in both '+
           ' IDB and EDB) ')

    op.add_option('--noMagic',
                  default=[],
                  action='append',
                  metavar='DB_PREDICATE_QNAME',
      help='Used with --why to specify that the predicate shouldnt have its '+
      'magic sets calculated')
    op.add_option('--filter',
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
      help='The Notation 3 documents to use as a filter (entailments do not particpate in network)')
    op.add_option('--ruleFacts',
                  action='store_true',
                  default=False,
      help="Determines whether or not to attempt to parse initial facts from "+
      "the rule graph.  The default is %default")
    op.add_option('--builtins',
                  default=False,
                  metavar='PATH_TO_PYTHON_MODULE',
      help="The path to a python module with function definitions (and a "+
      "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations")
    op.add_option('--dlp',
                  action='store_true',
                  default=False,
      help='Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default')
    op.add_option('--sparqlEndpoint',
                action='store_true',
                default=False,
    help='Indicates that the sole argument is the URI of a SPARQL endpoint to query')

    op.add_option('--ontology',
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
      help='The path to an OWL RDF/XML graph to use DLP to extract rules from '+
      '(other wise, fact graph(s) are used)  ')

    op.add_option('--ruleFormat',
        default='n3',
        dest='ruleFormat',
        metavar='RULE_FORMAT',
        choices=['n3', 'rif'],
        help="The format of the rules to parse ('n3', 'rif').  The default is %default")

    op.add_option('--ontologyFormat',
                default='xml',
                dest='ontologyFormat',
                metavar='RDF_FORMAT',
                choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
    help="The format of the OWL RDF/XML graph specified via --ontology.  The default is %default")

    op.add_option('--builtinTemplates',
                  default=None,
                  metavar='N3_DOC_PATH_OR_URI',
      help='The path to an N3 document associating SPARQL FILTER templates to '+
      'rule builtins')
    op.add_option('--negation',
                  action='store_true',
                  default=False,
      help='Extract negative rules?')
    op.add_option('--normalForm',
                  action='store_true',
                  default=False,
      help='Whether or not to reduce DL axioms & LP rules to a normal form')
    (options, facts) = op.parse_args()

    nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref, nsUri = nsBind.split('=')
        nsBinds[pref]=nsUri

    namespace_manager = NamespaceManager(Graph())
    if options.sparqlEndpoint:
        factGraph = Graph(plugin.get('SPARQL', Store)(facts[0]))
        options.hybrid = False
    else:
        factGraph = Graph()
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts and not options.sparqlEndpoint:
            factGraph.parse(fileN, format='n3')
            print("Parsing RDF facts from %s" % fileN)
        if options.builtins:
            import imp
            userFuncs = imp.load_source('builtins', options.builtins)
            rs = HornFromN3(fileN,
                            additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
            nsBinds.update(rs.nsMapping)
        elif options.ruleFormat == 'rif':
            try:
                from FuXi.Horn.RIFCore import RIFCoreParser
                rif_parser = RIFCoreParser(location=fileN, debug=options.debug)
                rs = rif_parser.getRuleset()
            except ImportError:
                raise Exception(
                    "Missing 3rd party libraries for RIF processing"
                )
        else:
            rs = HornFromN3(fileN)
        nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)
        #ruleGraph.parse(fileN,format='n3')

    ruleSet.nsMapping = nsBinds

    for prefix, uri in list(nsBinds.items()):
        namespace_manager.bind(prefix, uri, override=False)
    closureDeltaGraph = Graph()
    closureDeltaGraph.namespace_manager = namespace_manager
    factGraph.namespace_manager = namespace_manager

    if not options.sparqlEndpoint:
        for fileN in facts:
            factGraph.parse(fileN, format=options.inputFormat)
            if options.imports:
                for owlImport in factGraph.objects(predicate=OWL_NS.imports):
                    factGraph.parse(owlImport)
                    print("Parsed Semantic Web Graph.. %s" % owlImport)

    if not options.sparqlEndpoint and facts:
        for pref, uri in factGraph.namespaces():
            nsBinds[pref]=uri

    if options.stdin:
        assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint"
        factGraph.parse(sys.stdin, format=options.inputFormat)

    #Normalize namespace mappings
    #prune redundant, rdflib-allocated namespace prefix mappings
    newNsMgr = NamespaceManager(factGraph)
    from FuXi.Rete.Util import CollapseDictionary
    for k, v in list(CollapseDictionary(dict([(k, v)
                                    for k, v in factGraph.namespaces()])).items()):
        newNsMgr.bind(k, v)
    factGraph.namespace_manager = newNsMgr

    if options.normalForm:
        NormalFormReduction(factGraph)

    if not options.sparqlEndpoint:
        workingMemory = generateTokenSet(factGraph)
    if options.builtins:
        import imp
        userFuncs = imp.load_source('builtins', options.builtins)
        rule_store, rule_graph, network = SetupRuleStore(
                             makeNetwork=True,
                             additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
    else:
        rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    network.inferredFacts = closureDeltaGraph
    network.nsMap = nsBinds

    if options.dlp:
        from FuXi.DLP.DLNormalization import NormalFormReduction
        if options.ontology:
            ontGraph = Graph()
            for fileN in options.ontology:
                ontGraph.parse(fileN, format=options.ontologyFormat)
                for prefix, uri in ontGraph.namespaces():
                    nsBinds[prefix] = uri
                    namespace_manager.bind(prefix, uri, override=False)
                    if options.sparqlEndpoint:
                        factGraph.store.bind(prefix, uri)
        else:
            ontGraph=factGraph
        NormalFormReduction(ontGraph)
        dlp=network.setupDescriptionLogicProgramming(
                                 ontGraph,
                                 addPDSemantics=options.pDSemantics,
                                 constructNetwork=False,
                                 ignoreNegativeStratus=options.negation,
                                 safety=safetyNameMap[options.safety])
        ruleSet.formulae.extend(dlp)
    if options.output == 'rif' and not options.why:
        for rule in ruleSet:
            print(rule)
        if options.negation:
            for nRule in network.negRules:
                print(nRule)

    elif options.output == 'man-owl':
        cGraph = network.closureGraph(factGraph, readOnly=False)
        cGraph.namespace_manager = namespace_manager
        Individual.factoryGraph = cGraph
        if options.classes:
            mapping = dict(namespace_manager.namespaces())
            for c in options.classes:
                pref, uri = c.split(':')
                print(Class(URIRef(mapping[pref] + uri)).__repr__(True))
        elif options.properties:
            mapping = dict(namespace_manager.namespaces())
            for p in options.properties:
                pref, uri = p.split(':')
                print(Property(URIRef(mapping[pref] + uri)))
        else:
            for p in AllProperties(cGraph):
                print(p.identifier, first(p.label))
                print(repr(p))
            for c in AllClasses(cGraph):
                if options.normalize:
                    if c.isPrimitive():
                        primAnc = [sc for sc in c.subClassOf if sc.isPrimitive()]
                        if len(primAnc) > 1:
                            warnings.warn("Branches of primitive skeleton taxonomy" +
                              " should form trees: %s has %s primitive parents: %s" % (
                             c.qname, len(primAnc), primAnc), UserWarning, 1)
                        children = [desc for desc in c.subSumpteeIds()]
                        for child in children:
                            for otherChild in [o for o in children if o is not child]:
                                if not otherChild in [c.identifier
                                          for c in Class(child).disjointWith]:  # and\
                                    warnings.warn("Primitive children (of %s) " % (c.qname) +
                                          "must be mutually disjoint: %s and %s" % (
                                      Class(child).qname,
                                      Class(otherChild).qname), UserWarning, 1)
                # if not isinstance(c.identifier,BNode):
                print(c.__repr__(True))

    if not options.why:
        #Naive construction of graph
        for rule in ruleSet:
            network.buildNetworkFromClause(rule)

    magicSeeds=[]
    if options.why:
        builtinTemplateGraph = Graph()
        if options.builtinTemplates:
            builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                format='n3')
        factGraph.templateMap = \
            dict([(pred, template)
                      for pred, _ignore, template in
                            builtinTemplateGraph.triples(
                                (None,
                                 TEMPLATES.filterTemplate,
                                 None))])
        goals = []
        query = ParseSPARQL(options.why)
        network.nsMap['pml'] = PML
        network.nsMap['gmp'] = GMP_NS
        network.nsMap['owl'] = OWL_NS
        nsBinds.update(network.nsMap)
        network.nsMap = nsBinds
        if not query.prolog:
                query.prolog = Prolog(None, [])
                query.prolog.prefixBindings.update(nsBinds)
        else:
            for prefix, nsInst in list(nsBinds.items()):
                if prefix not in query.prolog.prefixBindings:
                    query.prolog.prefixBindings[prefix] = nsInst
        goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern(
                                    query.query.whereClause.parsedGraphPattern,
                                    query.prolog).patterns])
        # dPreds=[]# p for s,p,o in goals ]
        magicRuleNo = 0
        bottomUpDerivedPreds = []
        # topDownDerivedPreds  = []
        defaultBasePreds = []
        defaultDerivedPreds = set()
        hybridPredicates = []
        mapping = dict(newNsMgr.namespaces())
        for edb in options.edb:
            pref, uri = edb.split(':')
            defaultBasePreds.append(URIRef(mapping[pref] + uri))
        noMagic = []
        for pred in options.noMagic:
            pref, uri = pred.split(':')
            noMagic.append(URIRef(mapping[pref] + uri))
        if options.ddlGraph:
            ddlGraph = Graph().parse(options.ddlGraph, format='n3')
            # @TODO: should also get hybrid predicates from DDL graph
            defaultDerivedPreds=IdentifyDerivedPredicates(
                                    ddlGraph,
                                    Graph(),
                                    ruleSet)
        else:
            for idb in options.idb:
                pref, uri = idb.split(':')
                defaultDerivedPreds.add(URIRef(mapping[pref] + uri))
            defaultDerivedPreds.update(
                set([p == RDF.type and o or p for s, p, o in goals]))
            for hybrid in options.hybridPredicate:
                pref, uri = hybrid.split(':')
                hybridPredicates.append(URIRef(mapping[pref]+uri))

        if options.method == 'gms':
            for goal in goals:
                goalSeed=AdornLiteral(goal).makeMagicPred()
                print("Magic seed fact (used in bottom-up evaluation) %s" % goalSeed)
                magicSeeds.append(goalSeed.toRDFTuple())
            if noMagic:
                print("Predicates whose magic sets will not be calculated")
                for p in noMagic:
                    print("\t%s" % factGraph.qname(p))
            for rule in MagicSetTransformation(
                                       factGraph,
                                       ruleSet,
                                       goals,
                                       derivedPreds=bottomUpDerivedPreds,
                                       strictCheck=nameMap[options.strictness],
                                       defaultPredicates=(defaultBasePreds,
                                                          defaultDerivedPreds),
                                       noMagic=noMagic):
                magicRuleNo+=1
                network.buildNetworkFromClause(rule)
            if len(list(ruleSet)):
                print("reduction in size of program: %s (%s -> %s clauses)" % (
                                           100 - (float(magicRuleNo) /
                                                  float(len(list(ruleSet)))
                                                  ) * 100,
                                           len(list(ruleSet)),
                                           magicRuleNo))
            start = time.time()
            network.feedFactsToAdd(generateTokenSet(magicSeeds))
            if not [
                rule for rule in factGraph.adornedProgram if len(rule.sip)]:
                warnings.warn(
                    "Using GMS sideways information strategy with no "+
                      "information to pass from query.  Falling back to "+
                      "naive method over given facts and rules")
                network.feedFactsToAdd(workingMemory)
            sTime = time.time() - start
            if sTime > 1:
                sTimeStr = "%s seconds"%sTime
            else:
                sTime = sTime * 1000
                sTimeStr = "%s milli seconds"%sTime
            print("Time to calculate closure on working memory: %s" % sTimeStr)

            if options.output == 'rif':
                print("Rules used for bottom-up evaluation")
                if network.rules:
                    for clause in network.rules:
                        print(clause)
                else:
                    for clause in factGraph.adornedProgram:
                        print(clause)
            if options.output == 'conflict':
                network.reportConflictSet()

        elif options.method == 'bfp':
            topDownDPreds = defaultDerivedPreds
            if options.builtinTemplates:
                builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                    format='n3')
                builtinDict = dict([(pred, template)
                              for pred, _ignore, template in
                                    builtinTemplateGraph.triples(
                                        (None,
                                         TEMPLATES.filterTemplate,
                                         None))])
            else:
                builtinDict = None
            topDownStore=TopDownSPARQLEntailingStore(
                            factGraph.store,
                            factGraph,
                            idb=ruleSet,
                            DEBUG=options.debug,
                            derivedPredicates=topDownDPreds,
                            templateMap=builtinDict,
                            nsBindings=network.nsMap,
                            identifyHybridPredicates=options.hybrid \
                                    if options.method == 'bfp' else False,
                            hybridPredicates=hybridPredicates)
            targetGraph = Graph(topDownStore)
            for pref, nsUri in list(network.nsMap.items()):
                targetGraph.bind(pref, nsUri)
            start = time.time()
            # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal)
            #                                   for goal in goals],
            #                         targetGraph)
            # query = queryLiteral.asSPARQL()
            # print >>sys.stderr, "Goal to solve ", query
            sTime = time.time() - start
            result = targetGraph.query(options.why, initNs=network.nsMap)
            if result.askAnswer:
                sTime = time.time() - start
                if sTime > 1:
                    sTimeStr = "%s seconds"%sTime
                else:
                    sTime = sTime * 1000
                    sTimeStr = "%s milli seconds"%sTime
                print("Time to reach answer ground goal answer of %s: %s" % (
                      result.askAnswer[0], sTimeStr))
            else:
                for rt in result:
                    sTime = time.time() - start
                    if sTime > 1:
                        sTimeStr = "%s seconds" % sTime
                    else:
                        sTime = sTime * 1000
                        sTimeStr = "%s milli seconds" % sTime
                    if options.firstAnswer:
                        break
                    print(
                    "Time to reach answer %s via top-down SPARQL sip strategy: %s" % (
                    rt, sTimeStr))
            if options.output == 'conflict' and options.method == 'bfp':
                for _network, _goal in topDownStore.queryNetworks:
                    print(_network, _goal)
                    _network.reportConflictSet(options.debug)
                for query in topDownStore.edbQueries:
                    print(query.asSPARQL())

    elif options.method == 'naive':
        start = time.time()
        network.feedFactsToAdd(workingMemory)
        sTime = time.time() - start
        if sTime > 1:
            sTimeStr = "%s seconds"%sTime
        else:
            sTime = sTime * 1000
            sTimeStr = "%s milli seconds"%sTime
        print("Time to calculate closure on working memory: %s" % sTimeStr)
        print(network)
        if options.output == 'conflict':
            network.reportConflictSet()

    for fileN in options.filter:
        for rule in HornFromN3(fileN):
            network.buildFilterNetworkFromClause(rule)

    if options.negation and network.negRules and options.method in ['both',
                                                                    'bottomUp']:
        now=time.time()
        rt=network.calculateStratifiedModel(factGraph)
        print("Time to calculate stratified, stable model (inferred %s facts): %s" % (
                                    rt,
                                    time.time()-now))
    if options.filter:
        print("Applying filter to entailed facts")
        network.inferredFacts = network.filteredFacts

    if options.closure \
        and options.output in RDF_SERIALIZATION_FORMATS:
        cGraph = network.closureGraph(factGraph)
        cGraph.namespace_manager = namespace_manager
        print(cGraph.serialize(destination=None,
                               format=options.output,
                               base=None))
    elif options.output and options.output in RDF_SERIALIZATION_FORMATS:
        print(network.inferredFacts.serialize(destination=None,
                                              format=options.output,
                                              base=None))
class BrickEndpoint():
    def __init__(self,
                 sparqlServer,
                 brickVersion,
                 defaultGraph,
                 loadSchema=False):
        self.log = logging.getLogger()

        self.brickVerion = brickVersion
        self.sparqlServer = sparqlServer
        self.defaultGraph = defaultGraph

        self.BrickNS = Namespace(
            f"https://brickschema.org/schema/{brickVersion}/Brick#")
        self.BrickFrameNS = Namespace(
            f"https://brickschema.org/schema/{brickVersion}/BrickFrame#")
        self.BrickTagNS = Namespace(
            f"https://brickschema.org/schema/{brickVersion}/BrickTag#")
        self.BrickUseNS = Namespace(
            f"https://brickschema.org/schema/{brickVersion}/BrickUse#")

        # TODO: Out of the namespaces above, only brickNS is associated with prefix
        # 'brick', for lack of consistent prefixes for the other three.
        self.nsMgr = NamespaceManager(Graph())
        self.nsMgr.bind('brick', self.BrickNS)
        self.nsMgr.bind('rdf', RDF)
        self.nsMgr.bind('rdfs', RDFS)
        self.nsMgr.bind('owl', OWL)
        self.nsMgr.bind('skos', SKOS)
        self.nsMgr.bind('foaf', FOAF)
        self.nsMgr.bind('foaf', FOAF)
        self.queryPrefixes = \
        '\n'.join([f"PREFIX {prefix}: <{str(path)}>" for (prefix, path) in self.nsMgr.namespaces()])

        self.Brick = f"https://brickschema.org/schema/{brickVersion}/Brick.ttl"
        self.BrickFrame = f"https://brickschema.org/schema/{brickVersion}/BrickFrame.ttl"
        self.BrickTag = f"https://brickschema.org/schema/{brickVersion}/BrickTag.ttl"
        self.BrickUse = f"https://brickschema.org/schema/{brickVersion}/BrickUse.ttl"

    def _getSparql(self, graphName=None, update=False):
        graph = graphName if graphName else self.defaultGraph
        sparql = SPARQLWrapper(endpoint=self.sparqlServer,
                               updateEndpoint=self.sparqlServer + '-auth',
                               defaultGraph=graph)
        try:
            sparql.setCredentials('dba', os.environ['DBA_PASSWORD'])
            sparql.setHTTPAuth(DIGEST)
            sparql.setReturnFormat(JSON)
            if update:
                sparql.setMethod(POST)
            return sparql
        except Exception as e:
            self.log.error(f"exception: {e}")
            raise e

    def listGraphs(self):
        dbGraphs = []
        try:
            sparql = self._getSparql()
            sparql.setQuery('SELECT DISTINCT ?g WHERE { GRAPH ?g {?s a ?t} }')
            results = sparql.query().convert()['results']['bindings']

            self.log.debug(f"# of graphs: {len(results)}")
            for r in results:
                graphName = r['g']['value']
                self.log.debug(
                    f"{graphName} {self.queryGraphCount(graphName)}")
                dbGraphs.append(graphName)
            return dbGraphs
        except Exception as e:
            self.log.error(f"exception: {e}")
            raise e

    def dropGraph(self, graphName, force=False):
        if force:
            q = f"DROP SILENT GRAPH <{graphName}>"
        else:
            q = f"DROP GRAPH <{graphName}>"
        self.log.info(q)

        try:
            sparql = self._getSparql(graphName=graphName, update=True)
            sparql.setQuery(q)
            results = sparql.query()
        except Exception as e:
            self.log.error(f"exception: {e}")
            raise e

    def createGraph(self, graphName):
        try:
            sparql = self._getSparql(graphName=graphName, update=True)
            q = f"CREATE GRAPH <{graphName}>"
            self.log.info(q)
            sparql.setQuery(q)
            results = sparql.query()
        except Exception as e:
            self.log.error(f"exception: {e}")
            raise e

    def loadSchema(self):
        try:
            # delete all schema graphs before loading
            self.dropGraph(self.Brick, force=True)
            self.loadFileViaURL(self.Brick)
            self.dropGraph(self.BrickFrame, force=True)
            self.loadFileViaURL(self.BrickFrame)
            self.dropGraph(self.BrickUse, force=True)
            self.loadFileViaURL(self.BrickUse)
            self.dropGraph(self.BrickTag, force=True)
            self.loadFileViaURL(self.BrickTag)
        except Exception as e:
            self.log.error(f"exception: {e}")
            raise e

    def loadFileViaURL(self, graphFile, graphName=None):
        graph = graphName if graphName else graphFile
        try:
            sparql = self._getSparql(graphName=graph, update=True)
            q = f"LOAD <{graphFile}> INTO <{graph}>"
            self.log.info(q)
            sparql.setQuery(q)
            results = sparql.query()
        except Exception as e:
            self.log.error(f"exception: {e}")
            raise e

    def queryGraphCount(self, graphName):
        nTriples = None

        try:
            sparql = self._getSparql(graphName=graphName)

            # cheap op: get count
            q = 'SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o . }'
            sparql.setQuery(q)
            ret = sparql.query().convert()
            for r in ret['results']['bindings']:
                nTriples = r['count']['value']
                break
            return nTriples
        except Exception as e:
            self.log.error(f"exception: {e}")
            raise e

    def queryGraph(self, graphName, verbose=False):
        try:
            sparql = self._getSparql(graphName=graphName)
            sparql.setQuery('SELECT * WHERE { ?s ?p ?o. }')
            ret = sparql.query().convert()
            triples = ret['results']['bindings']
            self.log.debug(f"queryGraph # of triples: {len(triples)}")

            g = Graph()
            for r in triples:
                if verbose:
                    self.log.debug(f"({r['s']['type']})<{r['s']['value']}> " \
                                   f"({r['p']['type']})<{r['p']['value']}> " \
                                   f"({r['o']['type']})<{r['o']['value']}>")

                triple = ()
                for term in (r['s'], r['p'], r['o']):
                    if term['type'] == 'uri':
                        triple = triple + (URIRef(term['value']), )
                    elif term['type'] == 'literal':
                        if term['xml:lang']:
                            triple = triple + (Literal(term['value'],
                                                       term['xml:lang']), )
                        else:
                            triple = triple + (Literal(term['value']), )
                    elif term['type'] == 'bnode':
                        triple = triple + (BNode(term['value']), )
                    else:
                        assert False, f"term type {term['type']} is not handled"
                # end of for term
                g.add(triple)

            return g
        except Exception as e:
            self.log.error(f"exception: {e}")
            raise e

    # end of queruGraph()

    def loadGraph(self, g, graphName):
        try:
            sparql = self._getSparql(graphName, update=True)
            q = f"WITH <{graphName}> INSERT {{\n"
            for (s, p, o) in g:
                q += ' '.join([term.n3() for term in (s, p, o)]) + ' .\n'
            q += '}'
            sparql.setQuery(q)
            results = sparql.query()
        except Exception as e:
            self.log.error(f"exception: {e}")
            raise e

    def execQuery(self, queryStr, graphName=None):
        """
        add known prefixes to the queryStr and perform query
        """

        q = self.queryPrefixes + '\n' + queryStr
        self.log.debug(f"execQuery query:\n{q}")

        try:
            sparql = self._getSparql(graphName=graphName)
            sparql.setQuery(q)
            return sparql.query().convert()
        except Exception as e:
            self.log.error(f"exception: {e}")
            raise e

    def execUpdate(self, queryStr, graphName=None):
        """
        add known prefixes to the queryStr and perform update
        """

        q = self.queryPrefixes + '\n' + queryStr
        self.log.debug(f"execUpdate query:\n{q}")

        try:
            sparql = self._getSparql(graphName=graphName, update=True)
            sparql.setQuery(q)
            return sparql.query().convert()
        except Exception as e:
            self.log.error(f"exception: {e}")
            raise e

    def addNamespace(self, prefix, path):
        """
        Allow user to add their own namespaces for query convenience
        """

        self.nsMgr.bind(prefix, path)
        self.queryPrefixes = \
        '\n'.join([f"PREFIX {prefix}: <{str(path)}>" for (prefix, path) in self.nsMgr.namespaces()])
        self.log.debug(f"query prefixes:\n{self.queryPrefixes}")
Exemple #10
0
class RDFBuilder(object):
    """
    Class for building rdf graphs from document
    author: akorovin
    """
    def __init__(self, host_url):
        self._graph = Graph()
        self._ns_manager = NamespaceManager(self._graph)
        self._initialize_prefixes()
        self._namespaces = dict(
            (x, Namespace(y)) for x, y in self._ns_manager.namespaces())
        self._host_url = host_url

    def _initialize_prefixes(self):
        """
        Method to initialize namespaces in graph
        :return: None
        :rtype: None
        """
        oaNs = Namespace('http://www.w3.org/ns/oa#')
        reviewNs = Namespace(
            'http://eis.iai.uni-bonn.de/Projects/OSCOSS/reviews/')

        self._ns_manager.bind('oa', oaNs)
        self._ns_manager.bind('review', reviewNs)
        self._ns_manager.bind('foaf', FOAF)

    def _add_comments_to_rdf_graph(self, comments_content, document_node,
                                   document):
        """
        Private method for adding comments to graph
        :param comments_content: Filtered comments
        :type comments_content: dict
        :param document_node: Document root in the graph
        :type document_node: RDF triple
        :return: None
        :rtype: None
        """
        is_major_predicate = self._namespaces['review'].isMajor
        oa_ns = self._namespaces['oa']

        for comment_index, cur_comment_json in comments_content.iteritems():
            cur_comment_node = URIRef(document_node.toPython() + '/comments/' +
                                      comment_index)
            self._graph.add((cur_comment_node, RDF.type, oa_ns.Annotation))
            self._graph.add(
                (cur_comment_node, oa_ns.annotateAt,
                 Literal(
                     datetime.fromtimestamp(cur_comment_json['date'] / 1000))))
            self._graph.add((cur_comment_node, oa_ns.hasBody,
                             Literal(cur_comment_json['comment'])))
            self._graph.add((cur_comment_node, oa_ns.annotatedBy,
                             Literal(cur_comment_json['userName'])))

            self._add_target_for_comment(cur_comment_node, document_node,
                                         oa_ns, document)
            if 'review:isMajor' in cur_comment_json.keys():
                self._graph.add((cur_comment_node, is_major_predicate,
                                 Literal(cur_comment_json['review:isMajor'])))

    def _add_target_for_comment(self, cur_comment_node, document_node, oa_ns,
                                document):
        target_bnode = BNode()

        # TODO: this is lazy solution. need to add range
        self._graph.add((cur_comment_node, oa_ns.hasTarget, target_bnode))

        self._graph.add((target_bnode, RDF.type, oa_ns.SpecificResource))
        self._graph.add((target_bnode, oa_ns.hasSource, document_node))
        selector_bnode = BNode()

        self._graph.add((target_bnode, oa_ns.hasSelector, selector_bnode))

        self._graph.add((selector_bnode, RDF.type, oa_ns.TextPositionSelector))
        # TODO: add real start and end. Probably not possible to implement Text
        # Position Selector.
        self._graph.add((selector_bnode, oa_ns.end, Literal(7)))
        self._graph.add((selector_bnode, oa_ns.start, Literal(4)))

    def get_comments_by_document(self,
                                 document,
                                 comments_content,
                                 format='turtle'):
        """
        Getting rdf comments by document id
        :param request: Incoming request
        :type request: dict
        :param content_type: Type of content
        :type content_type: string
        :param document_id: Id of document
        :type document_id: int
        :param format: Serialization format. Default is Turtle
        :type format: string
        :return: Serialized graph
        :rtype: string
        """

        document_id = document.id
        document_node = URIRef(self._host_url + "document/" + str(document_id))

        self._graph.add((document_node, RDF.type, FOAF.Document))
        self._add_comments_to_rdf_graph(comments_content, document_node,
                                        document)

        # TODO: support json -ld
        # context = {
        #     "@vocab": BASE_FIDUS_URI + "oscoss.jsonld",
        #     "@language": "en"
        # }
        # graph_json = graph.serialize(
        #     format='json-ld',
        #     context=context,
        #     indent=4
        # )
        graph = self._graph.serialize(format=format)
        self._remove_root_graph(document_node)
        return graph
        # return HttpResponse(json.dumps(graph_json),
        # content_type='application/json')

    def _remove_root_graph(self, document_node):
        """
        Removes document node from the graph
        :param document_node: Document triple root
        :type document_node: RDF triple
        :return: None
        :rtype: None
        """
        self._graph.remove((document_node, None, None))
Exemple #11
0
 def get_graph_prefixes(self):
     namespace_manager = NamespaceManager(self.graph)
     all_prefixes = {n[0]: n[1] for n in namespace_manager.namespaces()}
     # all_prefixes.pop('')  # remove '' key
     self.prefixes = all_prefixes
Exemple #12
0
class OdgiStore(Store):
    """\
    An in memory implementation of an ODGI read only store.
    
    It used the disk based odgi/handlegraph as backing store.
    
    Authors: Jerven Bolleman
    """
    odgi_graph: odgi

    knownPaths: List[PathIriRef] = []
    namespace_manager: NamespaceManager
    base: str

    def __init__(self, configuration=None, identifier=None, base=None):
        super(OdgiStore, self).__init__(configuration)
        self.namespace_manager = NamespaceManager(Graph())
        self.bind('vg', VG)
        self.bind('faldo', FALDO)
        self.identifier = identifier
        self.configuration = configuration
        if base is None:
            self.base = 'http://example.org/vg/'
        else:
            self.base = base
        self.pathNS = Namespace(f'{self.base}path/')
        self.stepNS = Namespace(f'{self.base}step/')
        self.bind('path', self.pathNS)
        self.bind('step', self.stepNS)
        self.odgi_graph = None

    def open(self, odgi_file, create=False):
        og = odgi.graph()
        ogf = og.load(odgi_file)
        self.odgi_graph = og
        self.odgi_graph.for_each_path_handle(
            CollectPaths(self.knownPaths, self.odgi_graph, self.base))

    def triples(self, triple_pattern, context=None):
        """A generator over all the triples matching """
        subject, predicate, obj = triple_pattern
        """we have no bnodes in our data"""
        if isinstance(subject, BNode) or isinstance(object, BNode):
            return self.__emptygen()
        if RDF.type == predicate and obj is not None:
            return self.type_triples(subject, predicate, obj)
        elif predicate in nodeRelatedPredicates:
            return self.nodes(subject, predicate, obj)
        elif predicate in stepAssociatedPredicates:
            return self.steps(subject, predicate, obj)
        elif RDFS.label == predicate:
            return self.paths(subject, predicate, obj)
        elif subject is None and predicate is None and obj is None:
            return chain(self.nodes(subject, predicate, obj),
                         self.steps(subject, predicate, obj),
                         self.paths(subject, predicate, obj))
        elif subject is not None:
            if type(subject) == PathIriRef:
                return self.paths(subject, predicate, obj)
            elif type(subject) == StepBeginIriRef or type(
                    subject) == StepEndIriRef:
                return self.steps(subject, predicate, obj)
            elif type(subject) == NodeIriRef:
                return self.nodes(subject, predicate, obj)
            elif type(subject) == StepIriRef:
                return self.steps(subject, predicate, obj)

            subject_iri_parts = subject.toPython().split('/')
            if 'node' == subject_iri_parts[-2] and self.odgi_graph.has_node(
                    int(subject_iri_parts[-1])):
                handle = self.odgi_graph.get_handle(int(subject_iri_parts[-1]))
                ns = NodeIriRef(handle, self.odgi_graph, self.base)
                return chain(self.handle_to_triples(predicate, obj, handle),
                             self.handle_to_edge_triples(ns, predicate, obj))
            elif 'path' == subject_iri_parts[
                    -4] and 'step' == subject_iri_parts[-2]:
                return self.steps(subject, predicate, obj)
            elif 'path' == subject_iri_parts[-2]:
                return self.paths(subject, predicate, obj)
            else:
                return self.__emptygen()
        else:
            return self.__emptygen()

    # For the known types we can shortcut evaluation in many cases
    def type_triples(self, subject, predicate, obj):
        if VG.Node == obj:
            return self.nodes(subject, predicate, obj)
        elif VG.Path == obj:
            return self.paths(subject, predicate, obj)
        elif obj in stepAssociatedTypes:
            return self.steps(subject, predicate, obj)
        else:
            return self.__emptygen()

    def __all_types(self):
        for typ in knownTypes:
            yield from self.triples((None, RDF.type, typ))

    def __all_predicates(self):
        for predicate in knownPredicates:
            yield from self.triples((None, predicate, None))

    @staticmethod
    def __emptygen():
        """return an empty generator"""
        if False:
            yield

    def nodes(self, subject: Identifier, predicate: URIRef, obj: Node):
        if subject is not None:
            is_node_iri = self.is_node_iri_in_graph(subject)
            if predicate == RDF.type and obj == VG.Node and is_node_iri:
                yield [(subject, RDF.type, VG.Node), None]
            elif predicate is None and obj == VG.Node and is_node_iri:
                yield [(subject, RDF.type, VG.Node), None]
            elif predicate is None and is_node_iri:
                yield [(subject, RDF.type, VG.Node), None]

            if type(subject) == NodeIriRef:
                yield from self.handle_to_triples(predicate, obj,
                                                  subject.node_handle())
                yield from self.handle_to_edge_triples(subject, predicate, obj)
            elif is_node_iri:
                subject_iri_parts = subject.toPython().split('/')
                nh = self.odgi_graph.get_handle(int(subject_iri_parts[-1]))
                ns = NodeIriRef(nh, self.base, self.odgi_graph)
                yield from self.handle_to_triples(predicate, obj, nh)
                yield from self.handle_to_edge_triples(ns, predicate, obj)
        else:
            for handle in self.handles():
                ns = NodeIriRef(handle, self.base, self.odgi_graph)
                yield from self.nodes(ns, predicate, obj)

    def is_node_iri_in_graph(self, iri: URIRef):
        if type(iri) == NodeIriRef:
            return True
        else:
            iri_parts = iri.toPython().split('/')
            return 'node' == iri_parts[-2] and self.odgi_graph.has_node(
                int(iri_parts[-1]))

    def paths(self, subject: Identifier, predicate: URIRef, obj: Node):
        for p in self.knownPaths:
            if subject is None or p == subject:
                # given at RDF.type and the VG.Path as obj we can generate the matching triple
                if (predicate is None or predicate
                        == RDF.type) and (obj is None or obj == VG.Path):
                    yield [(p, RDF.type, VG.Path), None]

    def steps(self, subject: Identifier, predicate: URIRef, obj: Node):

        if subject is None:
            for pathRef in self.knownPaths:
                if not self.odgi_graph.is_empty(pathRef.path()):
                    rank = 1
                    position = 1
                    step_handle = self.odgi_graph.path_begin(pathRef.path())
                    node_handle = self.odgi_graph.get_handle_of_step(
                        step_handle)
                    yield from self.step_handle_to_triples(
                        step_handle,
                        subject,
                        predicate,
                        obj,
                        node_handle=node_handle,
                        rank=rank,
                        position=position)

                    while self.odgi_graph.has_next_step(step_handle):
                        step_handle = self.odgi_graph.get_next_step(
                            step_handle)
                        position = position + self.odgi_graph.get_length(
                            node_handle)
                        node_handle = self.odgi_graph.get_handle_of_step(
                            step_handle)
                        rank = rank + 1
                        yield from self.step_handle_to_triples(
                            step_handle,
                            subject,
                            predicate,
                            obj,
                            node_handle=node_handle,
                            rank=rank,
                            position=position)
        elif type(subject) == StepIriRef:
            yield from self.step_handle_to_triples(subject.step_handle(),
                                                   subject,
                                                   predicate,
                                                   obj,
                                                   rank=subject.rank(),
                                                   position=subject.position())
        elif type(subject) == StepBeginIriRef:
            yield from self.step_handle_to_triples(subject.step_handle(),
                                                   subject,
                                                   predicate,
                                                   obj,
                                                   rank=subject.rank(),
                                                   position=subject.position())
        elif type(subject) == StepEndIriRef:
            yield from self.step_handle_to_triples(subject.step_handle(),
                                                   subject,
                                                   predicate,
                                                   obj,
                                                   rank=subject.rank(),
                                                   position=subject.position())
        else:
            subject_iri_parts = subject.toPython().split('/')
            if 'path' == subject_iri_parts[-4] and 'step' == subject_iri_parts[
                    -2]:
                path_name = subject_iri_parts[-3]
                path_handle = self.odgi_graph.get_path_handle(path_name)
                step_rank = int(subject_iri_parts[-1])

                if not self.odgi_graph.is_empty(path_handle):
                    rank = 1
                    position = 1
                    step_handle = self.odgi_graph.path_begin(path_handle)
                    node_handle = self.odgi_graph.get_handle_of_step(
                        step_handle)
                    while rank != step_rank and self.odgi_graph.has_next_step(
                            step_handle):
                        rank = rank + 1
                        position = position + self.odgi_graph.get_length(
                            node_handle)
                        step_handle = self.odgi_graph.get_next_step(
                            step_handle)
                        node_handle = self.odgi_graph.get_handle_of_step(
                            step_handle)
                    yield from self.step_handle_to_triples(
                        step_handle,
                        subject,
                        predicate,
                        obj,
                        node_handle=node_handle,
                        rank=rank,
                        position=position)

    # else:
    # for nodeHandle in self.handles():
    # for step_handle in self.odgi_graph.steps_of_handle(nodeHandle, False):
    # yield from self.stepHandleToTriples(step_handle, subject, predicate, obj, nodeHandle=nodeHandle)
    def step_handle_to_triples(self,
                               step_handle: odgi.step_handle,
                               subject: Identifier,
                               predicate: URIRef,
                               obj: Node,
                               node_handle: odgi.handle = None,
                               rank=None,
                               position=None):

        if type(subject) == StepIriRef:
            step_iri = subject
        elif type(subject) == StepBeginIriRef:
            step_iri = subject.step_iri()
        elif type(subject) == StepEndIriRef:
            step_iri = subject.step_iri()
        else:
            step_iri = StepIriRef(step_handle, self.base, self.odgi_graph,
                                  position, rank)

        if subject is None or step_iri == subject:
            if predicate == RDF.type or predicate is None:
                if obj is None or obj == VG.Step:
                    yield [(step_iri, RDF.type, VG.Step), None]
                if obj is None or obj == FALDO.Region:
                    yield [(step_iri, RDF.type, FALDO.Region), None]
            if node_handle is None:
                node_handle = self.odgi_graph.get_handle_of_step(step_handle)
            node_iri = NodeIriRef(node_handle, self.base, self.odgi_graph)
            if (predicate == VG.node or predicate is None
                    and not self.odgi_graph.get_is_reverse(node_handle)) and (
                        obj is None or node_iri == obj):
                yield [(step_iri, VG.node, node_iri), None]

            if (predicate == VG.reverseOfNode or predicate is None
                    and self.odgi_graph.get_is_reverse(node_handle)) and (
                        obj is None or node_iri == obj):
                yield [(step_iri, VG.reverseOfNode, node_iri), None]

            if (predicate == VG.rank
                    or predicate is None) and rank is not None:
                rank = Literal(rank)
                if obj is None or obj == rank:
                    yield [(step_iri, VG.rank, rank), None]

            if (predicate == VG.position
                    or predicate is None) and position is not None:
                position = Literal(position)
                if obj is None or position == obj:
                    yield [(step_iri, VG.position, position), None]

            if predicate == VG.path or predicate is None:
                path = self.odgi_graph.get_path_handle_of_step(step_handle)
                path_iri = self.find_path_iri_by_handle(path)
                if obj is None or path_iri == obj:
                    yield [(step_iri, VG.path, path_iri), None]

            if predicate is None or predicate == FALDO.begin:
                yield [(step_iri, FALDO.begin, StepBeginIriRef(step_iri)),
                       None]

            if predicate is None or predicate == FALDO.end:
                yield [(step_iri, FALDO.end, StepEndIriRef(step_iri)), None]

            if subject is None:
                begin = StepBeginIriRef(step_iri)
                yield from self.faldo_for_step(step_iri, begin, predicate, obj)
                end = StepEndIriRef(step_iri)
                yield from self.faldo_for_step(step_iri, end, predicate, obj)

        if (type(subject)
                == StepBeginIriRef) and step_iri == subject.step_iri():
            yield from self.faldo_for_step(subject.step_iri(), subject,
                                           predicate, obj)
        elif type(subject) == StepEndIriRef and step_iri == subject.step_iri():
            yield from self.faldo_for_step(subject.step_iri(), subject,
                                           predicate, obj)

    def faldo_for_step(self, step_iri: StepIriRef, subject: Identifier,
                       predicate: URIRef, obj: Node):
        ep = Literal(subject.position())
        if (predicate is None
                or predicate == FALDO.position) and (obj is None or obj == ep):
            yield [(subject, FALDO.position, ep), None]
        if (predicate is None or predicate
                == RDF.type) and (obj is None or obj == FALDO.ExactPosition):
            yield [(subject, RDF.type, FALDO.ExactPosition), None]
        if (predicate is None or predicate
                == RDF.type) and (obj is None or obj == FALDO.Position):
            yield [(subject, RDF.type, FALDO.Position), None]
        if predicate is None or predicate == FALDO.reference:
            path = step_iri.path()
            path_iri = self.find_path_iri_by_handle(path)
            if obj is None or obj == path_iri:
                yield [(subject, FALDO.reference, path_iri), None]

    def handle_to_triples(self, predicate, obj, node_handle: odgi.handle):
        node_iri = NodeIriRef(node_handle, self.base, self.odgi_graph)

        if predicate == RDF.value or predicate is None:
            seq_value = rdflib.term.Literal(
                self.odgi_graph.get_sequence(node_handle))
            if obj is None or obj == seq_value:
                yield [(node_iri, RDF.value, seq_value), None]
        elif (predicate == RDF.type
              or predicate is None) and (obj is None or obj == VG.Node):
            yield [(node_iri, RDF.type, VG.Node), None]

    def handle_to_edge_triples(self, subject: NodeIriRef, predicate: URIRef,
                               obj: NodeIriRef):
        if predicate is None or (predicate in nodeRelatedPredicates):
            to_node_handles = []
            self.odgi_graph.follow_edges(subject.node_handle(), False,
                                         CollectEdges(to_node_handles))
            node_iri = NodeIriRef(subject.node_handle(), self.base,
                                  self.odgi_graph)
            for edge in to_node_handles:
                other_iri = NodeIriRef(edge, self.base, self.odgi_graph)
                if obj is None or other_iri == obj:
                    yield from self.generate_edge_triples(
                        edge, subject.node_handle(), node_iri, other_iri,
                        predicate)

    def generate_edge_triples(self, edge, node_handle: odgi.handle,
                              node_iri: NodeIriRef, other_iri: NodeIriRef,
                              predicate: URIRef):
        node_is_reverse = self.odgi_graph.get_is_reverse(node_handle)
        other_is_reverse = self.odgi_graph.get_is_reverse(edge)
        # TODO: check the logic here
        if (predicate is None or VG.linksForwardToForward
                == predicate) and not node_is_reverse and not other_is_reverse:
            yield [(node_iri, VG.linksForwardToForward, other_iri), None]
        if (predicate is None or VG.linksReverseToForward
                == predicate) and node_is_reverse and not other_is_reverse:
            yield [(node_iri, VG.linksReverseToForward, other_iri), None]
        if (predicate is None or VG.linksReverseToReverse
                == predicate) and node_is_reverse and other_is_reverse:
            yield [(node_iri, VG.linksReverseToReverse, other_iri), None]
        if (predicate is None or VG.linksReverseToReverse
                == predicate) and not node_is_reverse and other_is_reverse:
            yield [(node_iri, VG.linksForwardToReverse, other_iri), None]
        if predicate is None or VG.links == predicate:
            yield [(node_iri, VG.links, other_iri), None]

    def bind(self, prefix, namespace):
        self.namespace_manager.bind(prefix, namespace)

    def namespace(self, search_prefix):
        for prefix, namespace in self.namespace_manager.namespaces():
            if search_prefix == prefix:
                return namespace

    def prefix(self, search_namespace):
        for prefix, namespace in self.namespace_manager.namespaces():
            if search_namespace == namespace:
                return prefix

    def namespaces(self):
        return self.namespace_manager.namespaces()

    def handles(self):
        node_id = self.odgi_graph.min_node_id()

        max_node_id = self.odgi_graph.max_node_id()
        while node_id <= max_node_id:
            if self.odgi_graph.has_node(node_id):
                node_id = node_id + 1
                yield self.odgi_graph.get_handle(node_id - 1)

    def find_path_iri_by_handle(self, path_handle: odgi.path_handle):
        for p in self.knownPaths:
            if p.path() == path_handle:
                return p
            elif self.odgi_graph.get_path_name(
                    p.path()) == self.odgi_graph.get_path_name(path_handle):
                return p
        raise Exception("no path handle known " + str(path_handle))
Exemple #13
0
from src.namespaces import DCTERMS, SCHEMA, PROV, MA, MADBDATA, MADBRES, MADBSRC, MADBAPI
from src.rules import TYPE_CONVERSION

input_files = []

nsm = NamespaceManager(rdflib.Graph())
nsm.bind('dcterm', DCTERMS)
nsm.bind('dcterms', DCTERMS)
nsm.bind('schema', SCHEMA)
nsm.bind('prov', PROV)
nsm.bind('ma', MA)
nsm.bind('madbdata', MADBDATA)

prefix_to_ns = {}
for p, uri in nsm.namespaces():
    prefix_to_ns[p] = Namespace(uri)


def resolve_qname(qname):
    prefix, name = split_uri(qname)
    if prefix[:-1] in prefix_to_ns:
        return prefix_to_ns[prefix[:-1]][name]
    return None


def read_jsonl():
    for file in input_files:
        with open(file, 'r') as f:
            for line in f:
                yield json.loads(line)
Exemple #14
0
def main():
    from optparse import OptionParser
    op = OptionParser('usage: %prog [options] factFile1 factFile2 ... factFileN')

    op.add_option('--why',
                  default=None,
                  help='Specifies the goals to solve for using the non-naive methods' +
                       'see --method')

    op.add_option('--closure',
                  action='store_true',
                  default=False,
                  help='Whether or not to serialize the inferred triples' +
                        ' along with the original triples.  Otherwise ' +
                        '(the default behavior), serialize only the inferred triples')

    op.add_option('--imports',
                action='store_true',
                default=False,
                help='Whether or not to follow owl:imports in the fact graph')

    op.add_option('--output',
                  default='n3',
                  metavar='RDF_FORMAT',
                  choices=['xml',
                             'TriX',
                             'n3',
                             'pml',
                             'proof-graph',
                             'nt',
                             'rif',
                             'rif-xml',
                             'conflict',
                             'man-owl'],
                  help="Serialize the inferred triples and/or original RDF triples to STDOUT " +
                        "using the specified RDF syntax ('xml', 'pretty-xml', 'nt', 'turtle', " +
                         "or 'n3') or to print a summary of the conflict set (from the RETE " +
                         "network) if the value of this option is 'conflict'.  If the the " +
                         " value is 'rif' or 'rif-xml', Then the rules used for inference " +
                         "will be serialized as RIF.  If the value is 'pml' and --why is used, " +
                         " then the PML RDF statements are serialized.  If output is " +
                         "'proof-graph then a graphviz .dot file of the proof graph is printed. " +
                         "Finally if the value is 'man-owl', then the RDF facts are assumed " +
                         "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default")

    op.add_option('--class',
                  dest='classes',
                  action='append',
                  default=[],
                  metavar='QNAME',
                  help='Used with --output=man-owl to determine which ' +
                         'classes within the entire OWL/RDF are targetted for serialization' +
                         '.  Can be used more than once')

    op.add_option('--hybrid',
                  action='store_true',
                  default=False,
                  help='Used with with --method=bfp to determine whether or not to ' +
                         'peek into the fact graph to identify predicates that are both ' +
                         'derived and base.  This is expensive for large fact graphs' +
                         'and is explicitely not used against SPARQL endpoints')

    op.add_option('--property',
                  action='append',
                  dest='properties',
                  default=[],
                  metavar='QNAME',
                  help='Used with --output=man-owl or --extract to determine which ' +
                         'properties are serialized / extracted.  Can be used more than once')

    op.add_option('--normalize',
                  action='store_true',
                  default=False,
                  help="Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]" +
                  "The default is %default")

    op.add_option('--ddlGraph',
                default=False,
                  help="The location of a N3 Data Description document describing the IDB predicates")

    op.add_option('--input-format',
                  default='xml',
                  dest='inputFormat',
                  metavar='RDF_FORMAT',
                  choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
                  help="The format of the RDF document(s) which serve as the initial facts " +
                         " for the RETE network. One of 'xml', 'n3', 'trix', 'nt', " +
                         "or 'rdfa'.  The default is %default")

    op.add_option('--safety',
                  default='none',
                  metavar='RULE_SAFETY',
                  choices=['loose', 'strict', 'none'],
                  help="Determines how to handle RIF Core safety.  A value of 'loose' " +
                         " means that unsafe rules will be ignored.  A value of 'strict' " +
                         " will cause a syntax exception upon any unsafe rule.  A value of " +
                         "'none' (the default) does nothing")

    op.add_option('--pDSemantics',
                  action='store_true',
                  default=False,
                  help='Used with --dlp to add pD semantics ruleset for semantics not covered ' +
                  'by DLP but can be expressed in definite Datalog Logic Programming' +
                  ' The default is %default')

    op.add_option('--stdin',
                  action='store_true',
                  default=False,
                  help='Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ')

    op.add_option('--ns',
                  action='append',
                  default=[],
                  metavar="PREFIX=URI",
                  help='Register a namespace binding (QName prefix to a base URI).  This ' +
                         'can be used more than once')

    op.add_option('--rules',
                  default=[],
                  action='append',
                  metavar='PATH_OR_URI',
                  help='The Notation 3 documents to use as rulesets for the RETE network' +
                  '.  Can be specified more than once')

    op.add_option('-d', '--debug', action='store_true', default=True,
                  help='Include debugging output')

    op.add_option('--strictness',
                  default='defaultBase',
                  metavar='DDL_STRICTNESS',
                  choices=['loose',
                             'defaultBase',
                             'defaultDerived',
                             'harsh'],
                  help='Used with --why to specify whether to: *not* check if predicates are ' +
                  ' both derived and base (loose), if they are, mark as derived (defaultDerived) ' +
                  'or as base (defaultBase) predicates, else raise an exception (harsh)')

    op.add_option('--method',
                  default='naive',
                  metavar='reasoning algorithm',
                  choices=['gms', 'bfp', 'naive'],
                  help='Used with --why to specify how to evaluate answers for query.  ' +
                  'One of: gms, sld, bfp, naive')

    op.add_option('--firstAnswer',
                  default=False,
                  action='store_true',
                  help='Used with --why to determine whether to fetch all answers or just ' +
                  'the first')

    op.add_option('--edb',
                  default=[],
                  action='append',
                  metavar='EXTENSIONAL_DB_PREDICATE_QNAME',
                  help='Used with --why/--strictness=defaultDerived to specify which clashing ' +
                  'predicate will be designated as a base predicate')

    op.add_option('--idb',
                  default=[],
                  action='append',
                  metavar='INTENSIONAL_DB_PREDICATE_QNAME',
                  help='Used with --why/--strictness=defaultBase to specify which clashing ' +
                  'predicate will be designated as a derived predicate')

    op.add_option('--hybridPredicate',
                default=[],
                action='append',
                metavar='PREDICATE_QNAME',
                help='Used with --why to explicitely specify a hybrid predicate (in both ' +
                       ' IDB and EDB) ')

    op.add_option('--noMagic',
                  default=[],
                  action='append',
                  metavar='DB_PREDICATE_QNAME',
                  help='Used with --why to specify that the predicate shouldnt have its ' +
                  'magic sets calculated')

    op.add_option('--filter',
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
                  help='The Notation 3 documents to use as a filter (entailments do not particpate in network)')

    op.add_option('--ruleFacts',
                  action='store_true',
                  default=False,
                  help="Determines whether or not to attempt to parse initial facts from " +
                  "the rule graph.  The default is %default")

    op.add_option('--builtins',
                  default=False,
                  metavar='PATH_TO_PYTHON_MODULE',
                  help="The path to a python module with function definitions (and a " +
                  "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations")

    op.add_option('--dlp',
                  action='store_true',
                  default=False,
                  help='Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default')

    op.add_option('--sparqlEndpoint',
                action='store_true',
                default=False,
                help='Indicates that the sole argument is the URI of a SPARQL endpoint to query')

    op.add_option('--ontology',
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
                  help='The path to an OWL RDF/XML graph to use DLP to extract rules from ' +
                  '(other wise, fact graph(s) are used)  ')

    op.add_option('--ontologyFormat',
                default='xml',
                dest='ontologyFormat',
                metavar='RDF_FORMAT',
                choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
                help="The format of the OWL RDF/XML graph specified via --ontology.  The default is %default")

    op.add_option('--builtinTemplates',
                  default=None,
                  metavar='N3_DOC_PATH_OR_URI',
                  help='The path to an N3 document associating SPARQL FILTER templates to ' +
                  'rule builtins')

    op.add_option('--negation',
                  action='store_true',
                  default=False,
                  help='Extract negative rules?')

    op.add_option('--normalForm',
                  action='store_true',
                  default=False,
                  help='Whether or not to reduce DL axioms & LP rules to a normal form')
    (options, facts) = op.parse_args()

    nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref, nsUri = nsBind.split('=')
        nsBinds[pref] = nsUri

    namespace_manager = NamespaceManager(Graph())
    if options.sparqlEndpoint:
        factGraph = Graph(plugin.get('SPARQLStore', Store)(facts[0]))
        options.hybrid = False
    else:
        factGraph = Graph()
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts and not options.sparqlEndpoint:
            factGraph.parse(fileN, format='n3')
            print("Parsing RDF facts from ", fileN)
        if options.builtins:
            import imp
            userFuncs = imp.load_source('builtins', options.builtins)
            rs = HornFromN3(fileN,
                            additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
        else:
            rs = HornFromN3(fileN)
        nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)
        #ruleGraph.parse(fileN, format='n3')

    ruleSet.nsMapping = nsBinds

    for prefix, uri in list(nsBinds.items()):
        namespace_manager.bind(prefix, uri, override=False)
    closureDeltaGraph = Graph()
    closureDeltaGraph.namespace_manager = namespace_manager
    factGraph.namespace_manager = namespace_manager

    if not options.sparqlEndpoint:
        for fileN in facts:
            factGraph.parse(fileN, format=options.inputFormat)
            if options.imports:
                for owlImport in factGraph.objects(predicate=OWL_NS.imports):
                    factGraph.parse(owlImport)
                    print("Parsed Semantic Web Graph.. ", owlImport)

        if facts:
            for pref, uri in factGraph.namespaces():
                nsBinds[pref] = uri

    if options.stdin:
        assert not options.sparqlEndpoint, (
            "Cannot use --stdin with --sparqlEndpoint")
        factGraph.parse(sys.stdin, format=options.inputFormat)

    # Normalize namespace mappings
    # prune redundant, rdflib-allocated namespace prefix mappings
    new_ns_mgr = NamespaceManager(factGraph)
    from FuXi.Rete.Util import CollapseDictionary
    for k, v in CollapseDictionary(dict([(k, v) for k, v in
                                   factGraph.namespaces()])).items():
        new_ns_mgr.bind(k, v)
    factGraph.namespace_manager = new_ns_mgr

    if options.normalForm:
        NormalFormReduction(factGraph)

    if not options.sparqlEndpoint:
        workingMemory = generateTokenSet(factGraph)
    if options.builtins:
        import imp
        userFuncs = imp.load_source('builtins', options.builtins)
        rule_store, rule_graph, network = SetupRuleStore(
            makeNetwork=True, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
    else:
        rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    network.inferredFacts = closureDeltaGraph
    network.nsMap = nsBinds

    if options.dlp:
        from FuXi.DLP.DLNormalization import NormalFormReduction
        if options.ontology:
            ontGraph = Graph()
            for fileN in options.ontology:
                ontGraph.parse(fileN, format=options.ontologyFormat)
                for prefix, uri in ontGraph.namespaces():
                    nsBinds[prefix] = uri
                    namespace_manager.bind(prefix, uri, override=False)
                    if options.sparqlEndpoint:
                        factGraph.store.bind(prefix, uri)
        else:
            ontGraph = factGraph
        NormalFormReduction(ontGraph)
        dlp = network.setupDescriptionLogicProgramming(
            ontGraph,
            addPDSemantics=options.pDSemantics,
            constructNetwork=False,
            ignoreNegativeStratus=options.negation,
            safety=safetyNameMap[options.safety])
        ruleSet.formulae.extend(dlp)
    if options.output == 'rif' and not options.why:
        for rule in ruleSet:
            print(rule)
        if options.negation:
            for nRule in network.negRules:
                print(nRule)

    elif options.output == 'man-owl':
        cGraph = network.closureGraph(factGraph, readOnly=False)
        cGraph.namespace_manager = namespace_manager
        Individual.factoryGraph = cGraph
        if options.classes:
            mapping = dict(namespace_manager.namespaces())
            for c in options.classes:
                pref, uri = c.split(':')
                print(Class(URIRef(mapping[pref] + uri)).__repr__(True))
        elif options.properties:
            mapping = dict(namespace_manager.namespaces())
            for p in options.properties:
                pref, uri = p.split(':')
                print(Property(URIRef(mapping[pref] + uri)))
        else:
            for p in AllProperties(cGraph):
                print(p.identifier, first(p.label))
                print(repr(p))
            for c in AllClasses(cGraph):
                if options.normalize:
                    if c.isPrimitive():
                        primAnc = [sc for sc in c.subClassOf if sc.isPrimitive()]
                        if len(primAnc) > 1:
                            warnings.warn("Branches of primitive skeleton taxonomy" +
                              " should form trees: %s has %s primitive parents: %s" % (
                             c.qname, len(primAnc), primAnc), UserWarning, 1)
                        children = [desc for desc in c.subSumpteeIds()]
                        for child in children:
                            for otherChild in [o for o in children if o is not child]:
                                if not otherChild in [c.identifier
                                          for c in Class(child).disjointWith]:  # and \
                                            warnings.warn(
                                                "Primitive children (of %s) " % (c.qname) + \
                                                "must be mutually disjoint: %s and %s" % (
                                      Class(child).qname, Class(otherChild).qname), UserWarning, 1)
                # if not isinstance(c.identifier, BNode):
                print(c.__repr__(True))

    if not options.why:
        # Naive construction of graph
        for rule in ruleSet:
            network.buildNetworkFromClause(rule)

    if options.why:
        why(options, factGraph, network, nsBinds, ruleSet, workingMemory)
    elif options.method == 'naive':
        start = time.time()
        network.feedFactsToAdd(workingMemory)
        sTime = time.time() - start
        if sTime > 1:
            sTimeStr = "%s seconds" % sTime
        else:
            sTime = sTime * 1000
            sTimeStr = "%s milli seconds" % sTime
        print("Time to calculate closure on working memory: ", sTimeStr)
        print(network)
        if options.output == 'conflict':
            network.reportConflictSet()

    for fileN in options.filter:
        for rule in HornFromN3(fileN):
            network.buildFilterNetworkFromClause(rule)

    if options.negation and network.negRules and options.method in [
            'both', 'bottomUp']:
        now = time.time()
        rt = network.calculateStratifiedModel(factGraph)
        print(
            "Time to calculate stratified, stable model"
            " (inferred %s facts): %s" % (rt, time.time() - now))
    if options.filter:
        print("Applying filter to entailed facts")
        network.inferredFacts = network.filteredFacts

    if options.closure and options.output in RDF_SERIALIZATION_FORMATS:
        cGraph = network.closureGraph(factGraph)
        cGraph.namespace_manager = namespace_manager
        print(cGraph.serialize(destination=None,
                               format=options.output,
                               base=None))
    elif options.output and options.output in RDF_SERIALIZATION_FORMATS:
        print(network.inferredFacts.serialize(destination=None,
                                              format=options.output,
                                              base=None))
Exemple #15
0
def build_graph_and_post(reginfo_obj,
                         regitems_obj,
                         user=None,
                         passwd=None,
                         mode='single',
                         emitFile=False,
                         registry_auth_url=None,
                         updateOnlineRegisters=False,
                         verbose=False):
    if reginfo_obj == False or regitems_obj == False:
        return False

    ns_prefix_lookup = {
        "description": 'dct',
        "source": 'dct',
        "definition": 'skos',
        "broader": 'skos',
        "notation": 'reg',
        "note": 'skos',
        "altLabel": 'skos',
        "hiddenLabel": 'skos',
        "exactMatch": 'skos',
        "label": 'rdfs',
    }

    prefixes_g = rdflib.Graph()
    if verbose:
        print("Prefix file...")
        print(__file__)
    PREFIX_FILE = pkg_resources.resource_filename("ldrpyutils",
                                                  'data/prefixes.ttl')
    #if(pkg_resources.resource_exists("ldrpyutils", 'data/prefixes.ttl')):
    #    if verbose:
    #        print("Prefix file exists")
    #        print(pkg_resources.resource_string("ldrpyutils", 'data/prefixes.ttl'))
    #else:
    #    if verbose:
    #        print("Prefix file does not exist!")

    if verbose:
        print(PREFIX_FILE)
    with open(PREFIX_FILE) as f:
        #read_data = f.read()
        prefixes_g.parse(f, format="ttl")
    nsMgr = NamespaceManager(prefixes_g)

    all_ns = [n for n in nsMgr.namespaces()]
    prefix_idx = {}
    for prefix, namespace in all_ns:
        #print (prefix, namespace.n3())
        prefix_idx[prefix] = Namespace(namespace)

    g = None
    status = {
        "didEmitFile": False,
        "didUpdateOnlineRegisters": False,
        "isSuccessful": False,
    }
    if mode == 'single':
        register_id = reginfo_obj['id']
        register_url = reginfo_obj['registry_location']
        reglabel = reginfo_obj['label']
        regdescription = reginfo_obj['description']
        register_url = reginfo_obj['registry_location']

        (parent_reg_url, sub_reg_id
         ) = get_register_location_parent_and_subreg_url(register_url)
        subreg_graph = get_subregister_graph(sub_reg_id, reglabel,
                                             regdescription, prefix_idx, nsMgr)
        subreg_data = subreg_graph.serialize(None, format='turtle')
        if verbose:
            print("Outputting register graph for " + sub_reg_id)
            print(subreg_data)
        g = get_register_graph(sub_reg_id, reginfo_obj,
                               regitems_obj[sub_reg_id], nsMgr, prefix_idx,
                               ns_prefix_lookup)
        data = g.serialize(None, format='turtle')
        if verbose:
            print("Outputting graph for " + sub_reg_id)
            print(data)
        if emitFile or updateOnlineRegisters:
            filename = sub_reg_id + ".ttl"
            g.serialize(filename, format="turtle")
            status['didEmitFile'] = True
            if updateOnlineRegisters:
                # use the file to update the registers
                resFlag = post_update_to_online_register(
                    sub_reg_id,
                    parent_reg_url,
                    register_url,
                    data,
                    subreg_data,
                    registry_auth_url=registry_auth_url,
                    user=user,
                    passwd=passwd,
                    verbose=verbose)
                status['didUpdateOnlineRegisters'] = resFlag
                if resFlag == False:
                    status['isSuccessful'] = False
                else:
                    status['isSuccessful'] = True

    else:
        #assume multi register
        for key in reginfo_obj:
            register_id = key
            register_url = reginfo_obj[key]['registry_location']
            reglabel = reginfo_obj[key]['label']
            regdescription = reginfo_obj[key]['description']
            register_url = reginfo_obj[key]['registry_location']

            (parent_reg_url, sub_reg_id
             ) = get_register_location_parent_and_subreg_url(register_url)
            subreg_graph = get_subregister_graph(sub_reg_id, reglabel,
                                                 regdescription, prefix_idx,
                                                 nsMgr)
            subreg_data = subreg_graph.serialize(None, format='turtle')

            g = get_register_graph(sub_reg_id, reginfo_obj[key],
                                   regitems_obj[key], nsMgr, prefix_idx,
                                   ns_prefix_lookup)
            data = g.serialize(format='turtle')
            status['didEmitFile'] = True
            if verbose:
                print(data)
            if emitFile:
                filename = sub_reg_id + ".ttl"
                g.serialize(filename, format="turtle")
            if updateOnlineRegisters:
                #use the file to update the registers
                resFlag = post_update_to_online_register(
                    sub_reg_id,
                    parent_reg_url,
                    register_url,
                    data,
                    subreg_data,
                    registry_auth_url=registry_auth_url,
                    user=user,
                    passwd=passwd,
                    verbose=verbose)
                status['didUpdateOnlineRegisters'] = resFlag
                if resFlag == False:
                    status['isSuccessful'] = False
                else:
                    status['isSuccessful'] = True

    return (g, status)
Exemple #16
0
def from_n3(s, default=None, backend=None, nsm=None):
    r'''
    Creates the Identifier corresponding to the given n3 string.

        >>> from_n3('<http://ex.com/foo>') == URIRef('http://ex.com/foo')
        True
        >>> from_n3('"foo"@de') == Literal('foo', lang='de')
        True
        >>> from_n3('"""multi\nline\nstring"""@en') == Literal(
        ...     'multi\nline\nstring', lang='en')
        True
        >>> from_n3('42') == Literal(42)
        True
        >>> from_n3(Literal(42).n3()) == Literal(42)
        True
        >>> from_n3('"42"^^xsd:integer') == Literal(42)
        True
        >>> from rdflib import RDFS
        >>> from_n3('rdfs:label') == RDFS['label']
        True
        >>> nsm = NamespaceManager(Graph())
        >>> nsm.bind('dbpedia', 'http://dbpedia.org/resource/')
        >>> berlin = URIRef('http://dbpedia.org/resource/Berlin')
        >>> from_n3('dbpedia:Berlin', nsm=nsm) == berlin
        True

    '''
    if not s:
        return default
    if s.startswith("<"):
        return URIRef(s[1:-1])
    elif s.startswith('"'):
        if s.startswith('"""'):
            quotes = '"""'
        else:
            quotes = '"'
        value, rest = s.rsplit(quotes, 1)
        value = value[len(quotes) :]  # strip leading quotes
        datatype = None
        language = None

        # as a given datatype overrules lang-tag check for it first
        dtoffset = rest.rfind("^^")
        if dtoffset >= 0:
            # found a datatype
            # datatype has to come after lang-tag so ignore everything before
            # see: http://www.w3.org/TR/2011/WD-turtle-20110809/
            # #prod-turtle2-RDFLiteral
            datatype = from_n3(rest[dtoffset + 2 :], default, backend, nsm)
        else:
            if rest.startswith("@"):
                language = rest[1:]  # strip leading at sign

        value = value.replace(r"\"", '"')
        # Hack: this should correctly handle strings with either native unicode
        # characters, or \u1234 unicode escapes.
        value = value.encode("raw-unicode-escape").decode("unicode-escape")
        return Literal(value, language, datatype)
    elif s == "true" or s == "false":
        return Literal(s == "true")
    elif s.isdigit():
        return Literal(int(s))
    elif s.startswith("{"):
        identifier = from_n3(s[1:-1])
        return QuotedGraph(backend, identifier)
    elif s.startswith("["):
        identifier = from_n3(s[1:-1])
        return Graph(backend, identifier)
    elif s.startswith("_:"):
        return BNode(s[2:])
    elif ":" in s:
        if nsm is None:
            # instantiate default NamespaceManager and rely on its defaults
            nsm = NamespaceManager(Graph())
        prefix, last_part = s.split(":", 1)
        ns = dict(nsm.namespaces())[prefix]
        return Namespace(ns)[last_part]
    else:
        return BNode(s)
Exemple #17
0
def from_n3(s, default=None, backend=None, nsm=None):
    r'''
    Creates the Identifier corresponding to the given n3 string.

        >>> from_n3('<http://ex.com/foo>') == URIRef('http://ex.com/foo')
        True
        >>> from_n3('"foo"@de') == Literal('foo', lang='de')
        True
        >>> from_n3('"""multi\nline\nstring"""@en') == Literal(
        ...     'multi\nline\nstring', lang='en')
        True
        >>> from_n3('42') == Literal(42)
        True
        >>> from_n3(Literal(42).n3()) == Literal(42)
        True
        >>> from_n3('"42"^^xsd:integer') == Literal(42)
        True
        >>> from rdflib import RDFS
        >>> from_n3('rdfs:label') == RDFS['label']
        True
        >>> nsm = NamespaceManager(Graph())
        >>> nsm.bind('dbpedia', 'http://dbpedia.org/resource/')
        >>> berlin = URIRef('http://dbpedia.org/resource/Berlin')
        >>> from_n3('dbpedia:Berlin', nsm=nsm) == berlin
        True

    '''
    if not s:
        return default
    if s.startswith('<'):
        # Hack: this should correctly handle strings with either native unicode
        # characters, or \u1234 unicode escapes.
        return URIRef(
            s[1:-1].encode("raw-unicode-escape").decode("unicode-escape"))
    elif s.startswith('"'):
        if s.startswith('"""'):
            quotes = '"""'
        else:
            quotes = '"'
        value, rest = s.rsplit(quotes, 1)
        value = value[len(quotes):]  # strip leading quotes
        datatype = None
        language = None

        # as a given datatype overrules lang-tag check for it first
        dtoffset = rest.rfind('^^')
        if dtoffset >= 0:
            # found a datatype
            # datatype has to come after lang-tag so ignore everything before
            # see: http://www.w3.org/TR/2011/WD-turtle-20110809/
            # #prod-turtle2-RDFLiteral
            datatype = from_n3(rest[dtoffset + 2:], default, backend, nsm)
        else:
            if rest.startswith("@"):
                language = rest[1:]  # strip leading at sign

        value = value.replace(r'\"', '"')
        # Hack: this should correctly handle strings with either native unicode
        # characters, or \u1234 unicode escapes.
        value = value.encode("raw-unicode-escape").decode("unicode-escape")
        return Literal(value, language, datatype)
    elif s == 'true' or s == 'false':
        return Literal(s == 'true')
    elif s.isdigit():
        return Literal(int(s))
    elif s.startswith('{'):
        identifier = from_n3(s[1:-1])
        return QuotedGraph(backend, identifier)
    elif s.startswith('['):
        identifier = from_n3(s[1:-1])
        return Graph(backend, identifier)
    elif s.startswith("_:"):
        return BNode(s[2:])
    elif ':' in s:
        if nsm is None:
            # instantiate default NamespaceManager and rely on its defaults
            nsm = NamespaceManager(Graph())
        prefix, last_part = s.split(':', 1)
        ns = dict(nsm.namespaces())[prefix]
        return Namespace(ns)[last_part]
    else:
        return BNode(s)
Exemple #18
0
class OdgiStore(Store):
    """\
    An in memory implementation of an ODGI read only store.
    
    It used the disk based odgi/handlegraph as backing store.
    
    Authors: Jerven Bolleman
    """
    def __init__(self, configuration=None, identifier=None, base=None):
        super(OdgiStore, self).__init__(configuration)
        self.namespace_manager = NamespaceManager(Graph())
        self.bind('vg', VG)
        self.bind('faldo', FALDO)
        self.identifier = identifier
        self.configuration = configuration
        if base == None:
            self.base = 'http://example.org/vg/'
        else:
            self.base = base
        self.pathNS = Namespace(f'{self.base}path/')
        self.stepNS = Namespace(f'{self.base}step/')
        self.bind('path', self.pathNS)
        self.bind('step', self.stepNS)

    def open(self, odgifile, create=False):
        og = odgi.graph()
        ogf = og.load(odgifile)
        self.odgi = og

    def triples(self, triple_pattern, context=None):
        """A generator over all the triples matching """
        subject, predicate, obj = triple_pattern
        if RDF.type == predicate and obj != ANY:
            return self.typeTriples(subject, predicate, obj)
        elif (predicate in nodeRelatedPredicates):
            return self.nodes(subject, predicate, obj)
        elif predicate in stepAssociatedPredicates:
            return self.steps(subject, predicate, obj)
        elif RDFS.label == predicate:
            return self.paths(subject, predicate, obj)
        elif subject == ANY and predicate == ANY and obj == ANY:
            return chain(self.__allPredicates(), self.__allTypes())
        elif subject != ANY:
            subjectIriParts = subject.toPython().split('/')
            if 'node' == subjectIriParts[-2] and self.odgi.has_node(
                    int(subjectIriParts[-1])):
                handle = self.odgi.get_handle(int(subjectIriParts[-1]))
                return chain(
                    self.handleToTriples(predicate, obj, handle),
                    self.handleToEdgeTriples(subject, predicate, obj, handle))
            elif 'path' == subjectIriParts[-4] and 'step' == subjectIriParts[
                    -2]:
                return self.steps(subject, predicate, obj)
            elif 'path' == subjectIriParts[-2]:
                return self.paths(subject, predicate, obj)
            elif type(subject) == StepBeginIriRef or type(
                    subject) == StepEndIriRef:
                return self.steps(subject, predicate, obj)
            else:
                return self.__emptygen()
        else:
            return self.__emptygen()

    #For the known types we can shortcut evaluation in many cases
    def typeTriples(self, subject, predicate, obj):
        if VG.Node == obj:
            return self.nodes(subject, predicate, obj)
        elif VG.Path == obj:
            return self.paths(subject, predicate, obj)
        elif obj in stepAssociatedTypes:
            return self.steps(subject, predicate, obj)
        else:
            return self.__emptygen()

    def __allTypes(self):
        for typ in knownTypes:
            yield from self.triples((ANY, RDF.type, typ))

    def __allPredicates(self):
        for pred in knownPredicates:
            yield from self.triples((ANY, pred, ANY))

    def __emptygen(self):
        """return an empty generator"""
        if False:
            yield

    def nodes(self, subject, predicate, obj):
        if subject != ANY:
            isNodeIri = self.isNodeIriInGraph(subject)

            if predicate == RDF.type and obj == VG.Node and isNodeIri:
                yield [(subject, RDF.type, VG.Node), None]
            elif predicate == ANY and obj == VG.Node and isNodeIri:
                yield [(subject, RDF.type, VG.Node), None]
            elif (type(subject) == NodeIriRef):
                yield from self.handleToTriples(predicate, obj,
                                                subject._nodeHandle)
                yield from self.handleToEdgeTriples(subject, predicate, obj,
                                                    subject._nodeHandle)
            elif isNodeIri:
                subjectIriParts = subject.toPython().split('/')
                nh = self.odgi.get_handle(int(subjectIriParts[-1]))
                yield from self.handleToTriples(predicate, obj, nh)
                yield from self.handleToEdgeTriples(subject, predicate, obj,
                                                    nh)
            else:
                return self.__emptygen()
        else:
            for handle in self.handles():
                yield from self.handleToEdgeTriples(subject, predicate, obj,
                                                    handle)
                yield from self.handleToTriples(predicate, obj, handle)

    def isNodeIriInGraph(self, iri):
        if (type(iri) == NodeIriRef):
            return True
        else:
            iriParts = iri.toPython().split('/')
            return 'node' == iriParts[-2] and self.odgi.has_node(
                int(iriParts[-1]))

    def paths(self, subject, predicate, obj):
        li = []
        tt = PathToTriples(self.odgi, self.pathNS, subject, predicate, obj, li)
        self.odgi.for_each_path_handle(tt)
        for p in li:
            yield p

    def steps(self, subject, predicate, obj):

        if (subject == ANY):
            for pathHandle in self.pathHandles():
                if not self.odgi.is_empty(pathHandle):
                    rank = 1
                    position = 1
                    stepHandle = self.odgi.path_begin(pathHandle)
                    nodeHandle = self.odgi.get_handle_of_step(stepHandle)
                    yield from self.stepHandleToTriples(stepHandle,
                                                        subject,
                                                        predicate,
                                                        obj,
                                                        nodeHandle=nodeHandle,
                                                        rank=rank,
                                                        position=position)

                    while self.odgi.has_next_step(stepHandle):
                        stepHandle = self.odgi.get_next_step(stepHandle)
                        position = position + self.odgi.get_length(nodeHandle)
                        nodeHandle = self.odgi.get_handle_of_step(stepHandle)
                        rank = rank + 1
                        yield from self.stepHandleToTriples(
                            stepHandle,
                            subject,
                            predicate,
                            obj,
                            nodeHandle=nodeHandle,
                            rank=rank,
                            position=position)
        elif (type(subject) == StepIriRef):
            yield from self.stepHandleToTriples(subject.stepHandle(),
                                                subject,
                                                predicate,
                                                obj,
                                                rank=subject.rank(),
                                                position=subject.position())
        elif (type(subject) == StepBeginIriRef):
            yield from self.stepHandleToTriples(subject.stepHandle(),
                                                subject,
                                                predicate,
                                                obj,
                                                rank=subject.rank(),
                                                position=subject.position())
        elif (type(subject) == StepEndIriRef):
            yield from self.stepHandleToTriples(subject.stepHandle(),
                                                subject,
                                                predicate,
                                                obj,
                                                rank=subject.rank(),
                                                position=subject.position())
        else:
            subjectIriParts = subject.toPython().split('/')
            if 'path' == subjectIriParts[-4] and 'step' == subjectIriParts[-2]:
                pathName = subjectIriParts[-3]
                pathHandle = self.odgi.get_path_handle(pathName)
                stepRank = int(subjectIriParts[-1])

                if not self.odgi.is_empty(pathHandle):
                    rank = 1
                    position = 1
                    stepHandle = self.odgi.path_begin(pathHandle)
                    nodeHandle = self.odgi.get_handle_of_step(stepHandle)
                    while rank != stepRank and self.odgi.has_next_step(
                            stepHandle):
                        rank = rank + 1
                        position = position + self.odgi.get_length(nodeHandle)
                        stepHandle = self.odgi.get_next_step(stepHandle)
                        nodeHandle = self.odgi.get_handle_of_step(stepHandle)
                    yield from self.stepHandleToTriples(stepHandle,
                                                        subject,
                                                        predicate,
                                                        obj,
                                                        nodeHandle=nodeHandle,
                                                        rank=rank,
                                                        position=position)

    #else:
    #for nodeHandle in self.handles():
    #for stepHandle in self.odgi.steps_of_handle(nodeHandle, False):
    #yield from self.stepHandleToTriples(stepHandle, subject, predicate, obj, nodeHandle=nodeHandle)

    def stepHandleToTriples(self,
                            stepHandle,
                            subject,
                            predicate,
                            obj,
                            nodeHandle=None,
                            rank=None,
                            position=None):

        if (type(subject) == StepIriRef):
            stepIri = subject
        elif (type(subject) == StepBeginIriRef):
            stepIri = subject._stepIri
        elif (type(subject) == StepEndIriRef):
            stepIri = subject._stepIri
        else:
            stepIri = StepIriRef(stepHandle, self.base, self.odgi, position,
                                 rank)

        if (subject == ANY or stepIri == subject):
            if (predicate == RDF.type or predicate == ANY):
                if (obj == ANY or obj == VG.Step):
                    yield ([(stepIri, RDF.type, VG.Step), None])
                if (obj == ANY or obj == FALDO.Region):
                    yield ([(stepIri, RDF.type, FALDO.Region), None])
            if (nodeHandle == None):
                nodeHandle = self.odgi.get_handle_of_step(stepHandle)
            nodeIri = NodeIriRef(nodeHandle, odgi=self.odgi, base=self.base)
            if (predicate == VG.node or predicate == ANY
                    and not self.odgi.get_is_reverse(nodeHandle)) and (
                        obj == ANY or nodeIri == obj):
                yield ([(stepIri, VG.node, nodeIri), None])

            if (predicate == VG.reverseOfNode or predicate == ANY
                    and self.odgi.get_is_reverse(nodeHandle)) and (
                        obj == ANY or nodeIri == obj):
                yield ([(stepIri, VG.reverseOfNode, nodeIri), None])

            if (predicate == VG.rank or predicate == ANY) and not rank == None:
                rank = Literal(rank)
                if obj == ANY or obj == rank:
                    yield ([(stepIri, VG.rank, rank), None])

            if (predicate == VG.position
                    or predicate == ANY) and not position == None:
                position = Literal(position)
                if obj == ANY or position == obj:
                    yield ([(stepIri, VG.position, position), None])

            if (predicate == VG.path or predicate == ANY):
                path = self.odgi.get_path_handle_of_step(stepHandle)
                pathName = self.odgi.get_path_name(path)

                pathIri = self.pathNS.term(f'{pathName}')
                if obj == ANY or pathIri == obj:
                    yield ([(stepIri, VG.path, pathIri), None])

            if (predicate == ANY or predicate == FALDO.begin):
                yield ([(stepIri, FALDO.begin, StepBeginIriRef(stepIri)),
                        None])

            if (predicate == ANY or predicate == FALDO.end):
                yield ([(stepIri, FALDO.end, StepEndIriRef(stepIri)), None])

            if (subject == ANY):
                begin = StepBeginIriRef(stepIri)
                yield from self.faldoForStep(stepIri, begin, predicate, obj)
                end = StepEndIriRef(stepIri)
                yield from self.faldoForStep(stepIri, end, predicate, obj)

        if (type(subject) == StepBeginIriRef) and stepIri == subject._stepIri:
            yield from self.faldoForStep(subject._stepIri, subject, predicate,
                                         obj)
        elif (type(subject) == StepEndIriRef and stepIri == subject._stepIri):
            yield from self.faldoForStep(subject._stepIri, subject, predicate,
                                         obj)

    def faldoForStep(self, stepIri, subject, predicate, obj):
        ep = Literal(subject.position())
        if (predicate == ANY
                or predicate == FALDO.position) and (obj == ANY or obj == ep):
            yield ([(subject, FALDO.position, ep), None])
        if (predicate == ANY or predicate
                == RDF.type) and (obj == ANY or obj == FALDO.ExactPosition):
            yield ([(subject, RDF.type, FALDO.ExactPosition), None])
        if (predicate == ANY or predicate == RDF.type) and (obj == ANY or obj
                                                            == FALDO.Position):
            yield ([(subject, RDF.type, FALDO.Position), None])
        if (predicate == ANY or predicate == FALDO.reference):
            path = stepIri.path()
            pathName = self.odgi.get_path_name(path)
            pathIri = self.pathNS.term(f'{pathName}')
            if (obj == ANY or obj == pathIri):
                yield ([(subject, FALDO.reference, pathIri), None])

    def handleToTriples(self, predicate, obj, nodeHandle):
        nodeIri = NodeIriRef(nodeHandle, odgi=self.odgi, base=self.base)

        if (predicate == RDF.value or predicate == ANY):
            seqValue = rdflib.term.Literal(self.odgi.get_sequence(nodeHandle))
            if (obj == ANY or obj == seqValue):
                yield [(nodeIri, RDF.value, seqValue), None]
        elif (predicate == RDF.type
              or predicate == ANY) and (obj == ANY or obj == VG.Node):
            yield [(nodeIri, RDF.type, VG.Node), None]

    def handleToEdgeTriples(self, subject, predicate, obj, nodeHandle):

        if predicate == ANY or (predicate in nodeRelatedPredicates):
            toNodeHandles = []
            self.odgi.follow_edges(nodeHandle, False,
                                   CollectEdges(toNodeHandles))
            nodeIri = NodeIriRef(nodeHandle, odgi=self.odgi, base=self.base)
            for edge in toNodeHandles:

                otherIri = NodeIriRef(edge, odgi=self.odgi, base=self.base)

                if (obj == ANY or otherIri == obj):
                    nodeIsReverse = self.odgi.get_is_reverse(nodeHandle)
                    otherIsReverse = self.odgi.get_is_reverse(edge)
                    #TODO: check the logic here
                    if (predicate == ANY
                            or VG.linksForwardToForward == predicate
                        ) and not nodeIsReverse and not otherIsReverse:
                        yield ([(nodeIri, VG.linksForwardToForward, otherIri),
                                None])
                    if (predicate == ANY
                            or VG.linksReverseToForward == predicate
                        ) and nodeIsReverse and not otherIsReverse:
                        yield ([(nodeIri, VG.linksReverseToForward, otherIri),
                                None])
                    if (predicate == ANY or VG.linksReverseToReverse
                            == predicate) and nodeIsReverse and otherIsReverse:
                        yield ([(nodeIri, VG.linksReverseToReverse, otherIri),
                                None])
                    if (predicate == ANY
                            or VG.linksReverseToReverse == predicate
                        ) and not nodeIsReverse and otherIsReverse:
                        yield ([(nodeIri, VG.linksForwardToReverse, otherIri),
                                None])
                    if (predicate == ANY or VG.links == predicate):
                        yield ([(nodeIri, VG.links, otherIri), None])

    def bind(self, prefix, namespace):
        self.namespace_manager.bind(prefix, namespace)

    def namespace(self, searchPrefix):
        for prefix, namespace in self.namespace_manager.namespaces():
            if searchPrefix == prefix:
                return namespace

    def prefix(self, searchNamespace):
        for prefix, namespace in self.namespace_manager.namespaces():
            if searchNamespace == namespace:
                return prefix

    def namespaces(self):
        return self.namespace_manager.namespaces()

    def handles(self):
        nodeId = self.odgi.min_node_id()

        maxNodeId = self.odgi.max_node_id()
        while (nodeId <= maxNodeId):
            if (self.odgi.has_node(nodeId)):
                nodeId = nodeId + 1
                yield self.odgi.get_handle(nodeId - 1)

    def pathHandles(self):
        paths = []
        self.odgi.for_each_path_handle(CollectPaths(paths))
        yield from paths