class NSManager: def __init__(self, ns_dict): """ TODO: check ns_dict """ self._ns_dict = ns_dict self._rdflib_ns_manager = None def __getitem__(self, key): return self._ns_dict[key] def __getattr__(self, key): try: return self._ns_dict[key] except KeyError: raise AttributeError() def add_namespace(self, prefix, namespace): """ TODO: check prefix and namespace """ if self._ns_dict.has_key(prefix): raise AlreadyRegisteredNSError(prefix) self._ns_dict[prefix] = namespace @property def ns_dict(self): return self._ns_dict @property def rdflib_ns_manager(self): """ For using prefixes in RDFlib graphs """ if self._rdflib_ns_manager is None: self._rdflib_ns_manager = NamespaceManager(Graph()) for namesp in self._ns_dict: self._rdflib_ns_manager.bind(namesp, self._ns_dict[namesp]) return self._rdflib_ns_manager
def canonicalTerm(self, term): if isinstance(term, URIRef): if self.prolog is not None: namespace_manager = NamespaceManager(Graph()) for prefix,uri in self.prolog.prefixBindings.items(): namespace_manager.bind(prefix, uri, override=False) try: prefix,uri,localName = namespace_manager.compute_qname(term) except: return term if prefix not in self.prolog.prefixBindings: return term else: return u':'.join([prefix, localName]) else: return term elif isinstance(term, Literal): return term.n3() elif isinstance(term, BNode): return term.n3() else: assert isinstance(term, Variable) return term.n3()
def serialize_graph(request, rdfjson, base): editgraph = Graph() editgraph.parse(data=rdfjson, format="rdf-json") namespace_manager = NamespaceManager(Graph()) for ns in namespaces_dict: namespace_manager.bind(ns, Namespace(namespaces_dict[ns]), override=False) editgraph.namespace_manager = namespace_manager if base: """ RDFLib Module to insert the base during serialization is buggy. Manual insertion needed graphxml_string = editgraph.serialize(format="pretty-xml", base=base) """ graphxml_string = editgraph.serialize(format="pretty-xml").decode('utf-8', 'ignore') graphxml_string = graphxml_string.replace('rdf:RDF\n', 'rdf:RDF\n xml:base="' + base +'"\n') # print graphxml_string else: graphxml_string = editgraph.serialize(format="pretty-xml") graphxml_to_db = RDF_XML(rdfxml_string = graphxml_string) graphxml_to_db.save() print graphxml_to_db.id return json.dumps({'message':graphxml_to_db.id})
def init_database(self): """ Open the configured database """ self._init_rdf_graph() L.debug("opening " + str(self.source)) try: self.source.open() except OpenFailError as e: L.error('Failed to open the data source because: %s', e) raise nm = NamespaceManager(self['rdf.graph']) self['rdf.namespace_manager'] = nm self['rdf.graph'].namespace_manager = nm # A runtime version number for the graph should update for all changes # to the graph self['rdf.graph.change_counter'] = 0 self['rdf.graph'].store.dispatcher.subscribe(TripleAddedEvent, self._context_changed_handler()) self['rdf.graph'].store.dispatcher.subscribe(TripleRemovedEvent, self._context_changed_handler()) self['rdf.graph']._add = self['rdf.graph'].add self['rdf.graph']._remove = self['rdf.graph'].remove self['rdf.graph'].add = self._my_graph_add self['rdf.graph'].remove = self._my_graph_remove nm.bind("", self['rdf.namespace'])
def __init__(self,ruleStore,name = None, initialWorkingMemory = None, inferredTarget = None, nsMap = {}, graphVizOutFile=None, dontFinalize=False, goal=None): self.leanCheck = {} self.goal = goal self.nsMap = nsMap self.name = name and name or BNode() self.nodes = {} self.alphaPatternHash = {} self.ruleSet = set() for alphaPattern in xcombine(('1','0'),('1','0'),('1','0')): self.alphaPatternHash[tuple(alphaPattern)] = {} if inferredTarget is None: self.inferredFacts = Graph() namespace_manager = NamespaceManager(self.inferredFacts) for k,v in nsMap.items(): namespace_manager.bind(k, v) self.inferredFacts.namespace_manager = namespace_manager else: self.inferredFacts = inferredTarget self.workingMemory = initialWorkingMemory and initialWorkingMemory or set() self.proofTracers = {} self.terminalNodes = set() self.instantiations = {} start = time.time() self.ruleStore=ruleStore self.justifications = {} self.dischargedBindings = {} if not dontFinalize: self.ruleStore._finalize() self.filteredFacts = Graph() #'Universal truths' for a rule set are rules where the LHS is empty. # Rather than automatically adding them to the working set, alpha nodes are 'notified' # of them, so they can be checked for while performing inter element tests. self.universalTruths = [] from FuXi.Horn.HornRules import Ruleset self.rules=set() self.negRules = set() for rule in Ruleset(n3Rules=self.ruleStore.rules,nsMapping=self.nsMap): import warnings warnings.warn( "Rules in a network should be built *after* construction via "+ " self.buildNetworkClause(HornFromN3(n3graph)) for instance", DeprecationWarning,2) self.buildNetworkFromClause(rule) self.alphaNodes = [node for node in self.nodes.values() if isinstance(node,AlphaNode)] self.alphaBuiltInNodes = [node for node in self.nodes.values() if isinstance(node,BuiltInAlphaNode)] self._setupDefaultRules() if initialWorkingMemory: start = time.time() self.feedFactsToAdd(initialWorkingMemory) print >>sys.stderr,"Time to calculate closure on working memory: %s m seconds"%((time.time() - start) * 1000) if graphVizOutFile: print >>sys.stderr,"Writing out RETE network to ", graphVizOutFile renderNetwork(self,nsMap=nsMap).write(graphVizOutFile)
def serialize(self, add, delete): commit = Namespace("urn:commit:" + str(uuid.uuid1()) + ":") eccrev = Namespace("https://vocab.eccenca.com/revision/") g = ConjunctiveGraph() namespace_manager = NamespaceManager(g) namespace_manager.bind('eccrev', eccrev, override=False) g.add((commit.term(""), RDF.type, eccrev.Commit)) graphUris = set(delete.keys()) | set(add.keys()) for graphUri in graphUris: if (graphUri in delete.keys() and len(delete[graphUri]) > 0) or (graphUri in add.keys() and len(add[graphUri]) > 0): revision = Namespace("urn:revision:" + str(uuid.uuid1()) + ":") g.add((commit.term(""), eccrev.hasRevision, revision.term(""))) g.add((revision.term(""), RDF.type, eccrev.Revision)) if str(graphUri) != 'http://quitdiff.default/': g.add((revision.term(""), eccrev.hasRevisionGraph, graphUri)) if graphUri in delete.keys() and len(delete[graphUri]) > 0: deleteGraphName = revision.term(":delete") g.add((revision.term(""), eccrev.deltaDelete, deleteGraphName)) for triple in delete[graphUri]: g.add(triple + (deleteGraphName,)) if graphUri in add.keys() and len(add[graphUri]) > 0: insertGraphName = revision.term(":insert") g.add((revision.term(""), eccrev.deltaInsert, insertGraphName)) for triple in add[graphUri]: g.add(triple + (insertGraphName,)) return g.serialize(format="trig").decode("utf-8")
def load_graph_prefixes(): namespace_manager = NamespaceManager(Graph()) # restPrefix = Namespace('http://restaurants.recommender.es/od-data/restaurant/') # locPrefix = Namespace('http://restaurants.recommender.es/od-data/location/') # ratePrefix = Namespace('http://restaurants.recommender.es/od-data/rate/') # contPrefix = Namespace('http://restaurants.recommender.es/od-data/contact/') # # namespace_manager.bind('rest', restPrefix) # namespace_manager.bind('loc', locPrefix) # namespace_manager.bind('rate', ratePrefix) # namespace_manager.bind('cont', contPrefix) tree = ET.parse('metadata.xml') root = tree.getroot() prefixes = root.find("prefixes") for prefix in prefixes: namespace = Namespace(prefix.find('namespace').text) prefix_name = prefix.get('name') namespace_manager.bind(prefix_name, namespace) return namespace_manager
def _create_or_get_graph(self,name): if name not in self.models: graph = Graph() namespace_manager = NamespaceManager(Graph()) namespace_manager.bind(DEFAULT_NAMESPACE[0], self.default_ns) graph.ns_manager = namespace_manager self.models[name] = graph return self.models[name]
def openDatabase(self): """ Open a the configured database """ self._init_rdf_graph() L.debug("opening " + str(self.source)) self.source.open() nm = NamespaceManager(self['rdf.graph']) self['rdf.namespace_manager'] = nm self['rdf.graph'].namespace_manager = nm nm.bind("", self['rdf.namespace'])
def renderNetwork(network, nsMap={}): """ Takes an instance of a compiled ReteNetwork and a namespace mapping (for constructing QNames for rule pattern terms) and returns a BGL Digraph instance representing the Rete network #(from which GraphViz diagrams can be generated) """ # from FuXi.Rete import BuiltInAlphaNode # from BetaNode import LEFT_MEMORY, RIGHT_MEMORY, LEFT_UNLINKING dot = Dot(graph_type='digraph') namespace_manager = NamespaceManager(Graph()) for prefix, uri in list(nsMap.items()): namespace_manager.bind(prefix, uri, override=False) visitedNodes = {} edges = [] idx = 0 for node in list(network.nodes.values()): if node not in visitedNodes: idx += 1 visitedNodes[node] = generateBGLNode( dot, node, namespace_manager, str(idx)) dot.add_node(visitedNodes[node]) nodeIdxs = {} for node in list(network.nodes.values()): for mem in node.descendentMemory: if not mem: continue bNode = mem.successor for bNode in node.descendentBetaNodes: for idx, otherNode in enumerate([bNode.leftNode, bNode.rightNode]): if node == otherNode and (node, otherNode) not in edges: for i in [node, bNode]: if i not in visitedNodes: idx += 1 nodeIdxs[i] = idx visitedNodes[i] = generateBGLNode( dot, i, namespace_manager, str(idx)) dot.add_node(visitedNodes[i]) edge = Edge(visitedNodes[node], visitedNodes[bNode], label=idx == 0 and 'left' or 'right') dot.add_edge(edge) edges.append((node, bNode)) return dot
def testExpand(self): EX = Namespace("http://example.com/") namespace_manager = NamespaceManager(Graph()) namespace_manager.bind('ex', EX, override=False) self.testGraph.namespace_manager = namespace_manager man = Class(EX.Man) boy = Class(EX.Boy) woman = Class(EX.Woman) girl = Class(EX.Girl) male = Class(EX.Male) female = Class(EX.Female) human = Class(EX.Human) animal = Class(EX.Animal) cat = Class(EX.Cat) dog = Class(EX.Dog) animal = Class(EX.Animal) animal = cat | dog | human human += man human += boy human += woman human += girl male += man male += boy female += woman female += girl testClass = human & ~ female self.assertEquals(repr(testClass), 'ex:Human THAT ( NOT ex:Female )') newtestClass = ComplementExpansion(testClass, debug=True) self.assertTrue(repr(newtestClass) in [ '( ex:Boy or ex:Man )', '( ex:Man or ex:Boy )'], repr(newtestClass)) testClass2 = animal & ~ (male | female) self.assertEquals(repr(testClass2), '( ( ex:Cat or ex:Dog or ex:Human ) and ( not ( ex:Male or ex:Female ) ) )') newtestClass2 = ComplementExpansion(testClass2, debug=True) testClass2Repr = repr(newtestClass2) self.assertTrue(testClass2Repr in [ '( ex:Cat or ex:Dog )', '( ex:Dog or ex:Cat )'], testClass2Repr)
def init_database(self): """ Open the configured database """ self._init_rdf_graph() L.debug("opening " + str(self.source)) self.source.open() nm = NamespaceManager(self['rdf.graph']) self['rdf.namespace_manager'] = nm self['rdf.graph'].namespace_manager = nm # A runtime version number for the graph should update for all changes # to the graph self['rdf.graph.change_counter'] = 0 self['rdf.graph']._add = self['rdf.graph'].add self['rdf.graph']._remove = self['rdf.graph'].remove self['rdf.graph'].add = self._my_graph_add self['rdf.graph'].remove = self._my_graph_remove nm.bind("", self['rdf.namespace'])
def dataset(self): #pdb.set_trace() if hasattr(self._connection, 'dataset'): return getattr(self._connection, 'dataset') if self.store=='Sleepycat': dataset = Dataset(store=self.store, default_union=True) dataset.open(self.store_path, create = True) else: self.store = Virtuoso(self.connection) #dataset = Dataset(store=self.store, default_union=True) dataset = ConjunctiveGraph(store=self.store,identifier=CENDARI) self.store.connection # force connection setattr(self._connection, 'dataset', dataset) nm = NamespaceManager(dataset) for (prefix, ns) in INIT_NS.iteritems(): nm.bind(prefix, ns) dataset.namespace_manager = nm return dataset
def newgraph(request): print request.method # Create and bind namespaces namespace_manager = NamespaceManager(Graph()) for ns in namespaces_dict: namespace_manager.bind(ns, Namespace(namespaces_dict[ns])) # Create a new graph graph = Graph() graph.namespace_manager = namespace_manager triple_list = [] subject_list = [] predicate_list = [] subject_set = {} predicate_set = {} object_set = {} # Determine xml:base subject_base_test_set = {triple[0] for triple in triple_list} base_set = {subject[:subject.rfind("/")] for subject in subject_base_test_set} # If all subjects share the same substring-base, this substring-base is likely to be the xml:base. if len(base_set) == 1: base = str(list(base_set)[0]) + "/" else: base = "" # Serialize graph rdfjson = graph.serialize(None, format="rdf-json") # triple_fetcher_classes = get_triple_fetcher_classes() response = render_to_response('rdfedit/triples.html', {'rdfjson': rdfjson, 'triple_list': triple_list, 'subject_set': subject_set, 'predicate_set': predicate_set, 'object_set': object_set, 'namespaces_dict': json.dumps(namespaces_dict), 'base': base, 'triple_fetcher_classes': triple_fetcher_classes}, context_instance=RequestContext(request)) return response
def tobj(objname): SCHEMA = Namespace('http://schema.org/') SPDX = Namespace('http://www.spdx.org/rdf/terms#') n = NamespaceManager(Graph()) n.bind("schema", SCHEMA) n.bind("spdx", SPDX) c = get_db().cursor() c.execute('SELECT * FROM objects WHERE id=?', (objname,)) obj = c.fetchone() g = Graph() g.namespace_manager = n objuri = URIRef("http://localhost:5000/b/" + obj[0]) robjuri = URIRef("http://localhost:5000/r/" + obj[0]) md5node = BNode() g.add((md5node, SPDX.checksumValue, Literal(obj[2]))) g.add((md5node, SPDX.algorithm, URIRef("http://packages.qa.debian.org/#checksumAlgorithm_md5sum"))) g.add((objuri, SPDX.checksum, md5node)) g.add((objuri, SCHEMA.fileSize, Literal(obj[1]))) return Response(g.serialize(format="turtle"), mimetype="text/plain")
def load_ontology(): from FuXi.Horn.HornRules import HornFromN3 from FuXi.Rete.Util import generateTokenSet from FuXi.Rete.RuleStore import SetupRuleStore rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True) for rule in HornFromN3('rdfs-rules.n3'): network.buildNetworkFromClause(rule) # for rule in HornFromN3('owl-rules.n3'): # network.buildNetworkFromClause(rule) g = Graph(identifier='http://catalyst-fp8.eu/ontology') npm = NamespaceManager(g) g.namespace_manager = npm for name in ('SIOC','OA','CATALYST','IDEA','IBIS','VOTE','VERSION','ASSEMBL','OWL','RDF', 'OWL', 'RDFS', 'XSD'): npm.bind(name.lower(), globals()[name]) for f in ontology_files: g.parse(join(dirname(__file__), f), format='turtle') network.feedFactsToAdd(generateTokenSet(g)) for n in network.inferredFacts.triples((None, None, None)): g.add(n) return g
def __init__(self, path=None): """ If not path is passed it build a graph in memory. Otherwise, it creates a persistent graph in disk. """ if path is not None: # Create persistent Graph in disk self.path = path self.graph = ConjunctiveGraph('Sleepycat', URIRef("kb4it://")) graph_path = path + SEP + 'kb4it.graph' self.graph.store.open(graph_path) else: # Create Graph in Memory self.graph = ConjunctiveGraph('IOMemory') # Assign namespaces to the Namespace Manager of this graph namespace_manager = NamespaceManager(ConjunctiveGraph()) for ns in NSBINDINGS: namespace_manager.bind(ns, NSBINDINGS[ns]) self.graph.namespace_manager = namespace_manager
class Prologue: """ A class for holding prefixing bindings and base URI information """ def __init__(self): self.base = None self.namespace_manager = NamespaceManager( Graph()) # ns man needs a store def resolvePName(self, prefix, localname): ns = self.namespace_manager.store.namespace(prefix or "") if ns is None: raise Exception('Unknown namespace prefix : %s' % prefix) return URIRef(ns + (localname or "")) def bind(self, prefix, uri): self.namespace_manager.bind(prefix, uri, replace=True) def absolutize(self, iri): """ Apply BASE / PREFIXes to URIs (and to datatypes in Literals) TODO: Move resolving URIs to pre-processing """ if isinstance(iri, CompValue): if iri.name == 'pname': return self.resolvePName(iri.prefix, iri.localname) if iri.name == 'literal': return Literal( iri.string, lang=iri.lang, datatype=self.absolutize(iri.datatype)) elif isinstance(iri, URIRef) and not ':' in iri: return URIRef(iri, base=self.base) return iri
def main(labeled, wid_title_mapping, processed_out, discarded_out, dataset, format, resource_namespace, fact_namespace, ontology_namespace): # Namespace prefixes for RDF serialization RESOURCE_NS = Namespace(resource_namespace) FACT_EXTRACTION_NS = Namespace(fact_namespace) ONTOLOGY_NS = Namespace(ontology_namespace) NAMESPACE_MANAGER = NamespaceManager(Graph()) NAMESPACE_MANAGER.bind('resource', RESOURCE_NS) NAMESPACE_MANAGER.bind('fact', FACT_EXTRACTION_NS) NAMESPACE_MANAGER.bind('ontology', ONTOLOGY_NS) mapping = json.load(wid_title_mapping) with codecs.open(labeled, 'rb', 'utf8') as f: labeled = json.load(f) processed, discarded = to_assertions(labeled, mapping, NAMESPACE_MANAGER, { 'ontology': ONTOLOGY_NS, 'resource': RESOURCE_NS, 'fact_extraction': FACT_EXTRACTION_NS, }, outfile=dataset, format=format) with codecs.open(processed_out, 'wb', 'utf8') as f: f.writelines('\n'.join(processed)) with codecs.open(discarded_out, 'wb', 'utf8') as f: f.writelines('\n'.join(discarded))
def serialize(self, add, delete): diff = Namespace("http://topbraid.org/diff#") g = ConjunctiveGraph() namespace_manager = NamespaceManager(g) namespace_manager.bind('diff', diff, override=False) namespace_manager.bind('owl', OWL, override=False) graphUris = set(delete.keys()) | set(add.keys()) for graphUri in graphUris: if (graphUri in delete.keys() and len(delete[graphUri]) > 0) or (graphUri in add.keys() and len(add[graphUri]) > 0): changeset = Namespace("urn:diff:" + str(uuid.uuid1())) graphTerm = changeset.term("") if str(graphUri) != 'http://quitdiff.default/': g.add((graphTerm, OWL.imports, graphUri, graphTerm)) g.add((graphTerm, RDF.type, OWL.Ontology, graphTerm)) g.add((graphTerm, OWL.imports, diff.term(""), graphTerm)) if graphUri in delete.keys() and len(delete[graphUri]) > 0: i = 0 for triple in delete[graphUri]: deleteStatementName = BNode() g.add((deleteStatementName, RDF.type, diff.DeletedTripleDiff, graphTerm)) g.add((deleteStatementName, RDF.subject, triple[0], graphTerm)) g.add((deleteStatementName, RDF.predicate, triple[1], graphTerm)) g.add((deleteStatementName, RDF.object, triple[2], graphTerm)) i += 1 if graphUri in add.keys() and len(add[graphUri]) > 0: i = 0 for triple in add[graphUri]: insertGraphName = BNode() g.add((insertGraphName, RDF.type, diff.AddedTripleDiff, graphTerm)) g.add((insertGraphName, RDF.subject, triple[0], graphTerm)) g.add((insertGraphName, RDF.predicate, triple[1], graphTerm)) g.add((insertGraphName, RDF.object, triple[2], graphTerm)) i += 1 return g.serialize(format="trig").decode("utf-8")
class Prologue(object): """ A class for holding prefixing bindings and base URI information """ def __init__(self): self.base = None self.namespace_manager = NamespaceManager( Graph()) # ns man needs a store def resolvePName(self, prefix, localname): ns = self.namespace_manager.store.namespace(prefix or "") if ns is None: raise Exception("Unknown namespace prefix : %s" % prefix) return URIRef(ns + (localname or "")) def bind(self, prefix, uri): self.namespace_manager.bind(prefix, uri, replace=True) def absolutize(self, iri): """ Apply BASE / PREFIXes to URIs (and to datatypes in Literals) TODO: Move resolving URIs to pre-processing """ if isinstance(iri, CompValue): if iri.name == "pname": return self.resolvePName(iri.prefix, iri.localname) if iri.name == "literal": return Literal(iri.string, lang=iri.lang, datatype=self.absolutize(iri.datatype)) elif isinstance(iri, URIRef) and not ":" in iri: return URIRef(iri, base=self.base) return iri
class QNameManager(object): def __init__(self, nsDict=None): self.nsDict = nsDict and nsDict or {} self.nsMgr = NamespaceManager(Graph()) self.nsMgr.bind('owl', 'http://www.w3.org/2002/07/owl#') self.nsMgr.bind('math', 'http://www.w3.org/2000/10/swap/math#') def bind(self, prefix, namespace): self.nsMgr.bind(prefix, namespace)
class QNameManager(object): def __init__(self,nsDict=None): self.nsDict = nsDict and nsDict or {} self.nsMgr = NamespaceManager(Graph()) self.nsMgr.bind('owl','http://www.w3.org/2002/07/owl#') self.nsMgr.bind('math','http://www.w3.org/2000/10/swap/math#') def bind(self,prefix,namespace): self.nsMgr.bind(prefix,namespace)
def get_graph(): namespace_manager = NamespaceManager(Graph()) namespace_manager.bind('owl', OWL_NS, override = False) namespace_manager.bind('swo', swoNs, override = False) namespace_manager.bind('cas', casNs, override = False) namespace_manager.bind('dcat', dcatNs, override = False) g = Graph() g.namespace_manager = namespace_manager return g
def canonicalTerm(self, term): if isinstance(term, URIRef): if self.prolog is not None: namespace_manager = NamespaceManager(Graph()) for prefix, uri in self.prolog.prefixBindings.items(): namespace_manager.bind(prefix, uri, override=False) try: prefix, uri, localName = namespace_manager.compute_qname( term) except: return term if prefix not in self.prolog.prefixBindings: return term else: return u':'.join([prefix, localName]) else: return term elif isinstance(term, Literal): return term.n3() elif isinstance(term, BNode): return term.n3() else: assert isinstance(term, Variable) return term.n3()
def write_to_turtle(news_results, country_str): g = Graph() namespace_manager = NamespaceManager(Graph()) n_dbpedia_res = Namespace("http://dbpedia.org/resource/") n_custom_ontology = Namespace("http://www.semanticweb.org/sws/group4/ontology/") n_custom_resources = Namespace("http://www.semanticweb.org/sws/group4/resources/") namespace_manager.bind('dbp', n_dbpedia_res, override=False) namespace_manager.bind('swo', n_custom_ontology, override=False) namespace_manager.bind('sws', n_custom_resources, override=False) g.namespace_manager = namespace_manager country = n_dbpedia_res[country_str.replace(" ", "_")] for news_entity in news_results: # article = BNode() hash_input = news_entity['url'] + news_entity['title'] + news_entity['publication_date'] # generate 15 digit hash for name news_id = int(hashlib.sha256(hash_input.encode('utf-8')).hexdigest(), 16) % 10**15 article = URIRef(n_custom_resources['Article-' + str(news_id)]) g.add((article, RDF.type, n_custom_ontology['NewsArticle'])) # data properties g.add((article,n_custom_ontology['origin'], Literal(news_entity['host']))) g.add((article, n_custom_ontology['sourceUrl'], Literal(news_entity['url']))) g.add((article, n_custom_ontology['publicationDate'], Literal(news_entity['publication_date'], datatype=XSD.dateTime))) g.add((article, n_custom_ontology['title'], Literal(news_entity['title']))) if news_entity['sentiment']: g.add((article, n_custom_ontology['sentiment'], Literal(news_entity['sentiment']))) # object properties g.add((article, n_custom_ontology['mentionsCountry'], country)) # blank node for related resources for related_res in news_entity['related_res']: # rel = BNode() # generate 15 digit hash for news article mention s = str(related_res[1]) + str(related_res[0]) + str(news_id) mention_id = int(hashlib.sha256(s.encode('utf-8')).hexdigest(), 16) % 10 ** 15 rel = URIRef(n_custom_resources['ArticleMention-' + str(mention_id)]) g.add((rel, RDF.type, n_custom_ontology['ArticleMention'])) g.add((rel, n_custom_ontology['relevance'], Literal(related_res[1]))) g.add((rel, n_custom_ontology['mentionsResource'], URIRef(related_res[0]))) g.add((article, n_custom_ontology['hasMention'], rel)) # write to output file g.serialize(destination=f'ttl/{country_str.replace(" ","_")}.ttl', format='turtle')
def _configure_namespaces(self): """ Loads all the registered namespaces from the configuration file at `./config.json` and registers the namespaces and their prefixes in the Graph. :rtype: None """ self.ns = {} namespaces = dcat_config['namespaces'] for prefix, namespace in namespaces.iteritems(): self.ns[prefix] = Namespace(namespace) ns_manager = NamespaceManager(self.graph) [ ns_manager.bind(prefix.lower(), namespace, override=True) for prefix, namespace in self.ns.iteritems() ] self.graph.namespace_manager = ns_manager
class NIFPrefixes: def __init__(self): self.manager = NamespaceManager(Graph()) self.manager.bind("xsd", XSD) self.manager.bind("itsrdf", ITSRDF) self.manager.bind("nif", NIF) self._XSD = '@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n' self._ITSRDF = '@prefix itsrdf: <http://www.w3.org/2005/11/its/rdf#> .\n' self._NIF = '@prefix nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#> .\n' @property def turtle(self): return self._XSD + self._ITSRDF + self._NIF
def deflate(self, model_object, props, rels): """Overrides the `BaseSerializer` method to add graph logic.""" namespace_manager = NamespaceManager(self.g) namespace_manager.bind(Ori.prefix, Namespace(Ori.uri), override=False) namespace_manager.bind(model_object.prefix, Namespace(model_object.uri), override=False) s = URIRef('{}{}'.format(Ori.uri, model_object.get_ori_identifier())) p = URIRef('{}type'.format(Rdf.uri)) o = URIRef(self.uri_format(model_object)) self.g.add(( s, p, o, )) for name, definition in model_object.definitions(props=props, rels=rels): value = model_object.values.get(name, None) if value: p = URIRef(self.uri_format(definition)) try: o = self.serialize_prop(definition, value) except MissingProperty: raise namespace_manager.bind(definition.ns.prefix, Namespace(definition.ns.uri), override=False) if type(o) != list: self.g.add(( s, p, o, )) else: for oo in o: self.g.add(( s, p, oo, )) elif definition.required and not model_object.skip_validation: raise RequiredProperty( "Property '{}' is required for {}".format( name, model_object.compact_uri()))
def create_molecule_graph(uri_base, mol): mongochem = Namespace('%s/api/v1/molecules/' % uri_base) g = Graph() inchi = mol['inchi'] name = mol['name'] inchi_node = BNode() molecule = URIRef(mongochem[mol['_id']]) namespace_manager = NamespaceManager(g) namespace_manager.bind('cheminf', cheminf, override=False) namespace_manager.bind('mongochem', mongochem, override=False) namespace_manager.bind('owl', OWL, override=False) g.add((molecule, OWL.subClassOf, cheminf.CHEMINF_000000)) g.add((molecule, OWL.label, Literal(name.lower()))) g.add((inchi_node, RDF.type, cheminf.CHEMINF_000113)) g.add((inchi_node, cheminf.SIO_000300, Literal(inchi))) g.add((molecule, cheminf.CHEMINF_000200, inchi_node)) return g.serialize()
def write_towns_to_turtle(towns, countryTowns): g = Graph() namespace_manager = NamespaceManager(Graph()) n_dbpedia_res = Namespace("http://dbpedia.org/resource/") n_dbo_res = Namespace("http://dbpedia.org/ontology/") n_custom_ontology = Namespace("http://www.semanticweb.org/sws/group4/ontology/") #n_custom_resources = Namespace("http://www.semanticweb.org/sws/group4/resources/") namespace_manager.bind('swo', n_custom_ontology, override=False) #namespace_manager.bind('sws', n_custom_resources, override=False) namespace_manager.bind('dbp', n_dbpedia_res, override=False) namespace_manager.bind('dbo', n_dbo_res, override=False) g.namespace_manager = namespace_manager # town = n_dbpedia_res[country_str.replace(" ", "_")] for iso, name, lat, lon, templ, temph, df, dt, mf, mt in towns: t = URIRef(n_dbpedia_res[name.replace(' ','_')]) # add the town as a named individual g.add((t, RDF.type, n_dbo_res.Town)) # add data props g.add((t, n_custom_ontology['townName'], Literal(name))) g.add((t, n_custom_ontology['latitude'], Literal(lat))) g.add((t, n_custom_ontology['longitude'], Literal(lon))) g.add((t, n_custom_ontology['tempTypicalLow'], Literal(templ))) g.add((t, n_custom_ontology['tempTypicalHigh'], Literal(temph))) g.add((t, n_custom_ontology['dayFrom'], Literal(df))) g.add((t, n_custom_ontology['dayTo'], Literal(dt))) g.add((t, n_custom_ontology['monthFrom'], Literal(mf))) g.add((t, n_custom_ontology['monthTo'], Literal(mt))) for country, town in countryTowns: if (country is not None and town is not None): c = URIRef(n_dbpedia_res[country.replace(" ", "_")]) t = URIRef(n_dbpedia_res[town.replace(" ", "_")]) g.add((c, n_custom_ontology['hasTown'], t)) # write to output file g.serialize(destination=f'ttl/towns.ttl', format='turtle')
def write_to_turtle_rdf(df, output_file): g = Graph() namespace_manager = NamespaceManager(Graph()) n_geo = Namespace("http://sws.geonames.org/") n_custom_ont = Namespace( "http://vocab.informatik.tuwien.ac.at/VU184.729-2018/e01429253/ontology/" ) n_custom_cls = Namespace( "http://vocab.informatik.tuwien.ac.at/VU184.729-2018/e01429253/class/") n_time = Namespace("http://www.w3.org/2006/time/") namespace_manager.bind('tuwo', n_custom_ont, override=False) namespace_manager.bind('tuwc', n_custom_cls, override=False) namespace_manager.bind('gn', n_geo, override=False) namespace_manager.bind('time', n_time, override=False) g.namespace_manager = namespace_manager # define properties movement_property = n_custom_ont['populationMovement'] orig_country_property = n_custom_ont['countryOfOrigin'] pop_type_property = n_custom_ont['populationType'] year_property = n_time['year'] pop_amount_property = n_custom_ont['peopleAmount'] # add nodes to the graph for index, row in df.iterrows(): # blank node for connection relation_node = BNode() # a GUID is generated # base triple (residence_country, movement, blank_node) g.add((n_geo[str(int(row[0]))], movement_property, relation_node)) # child properties g.add((relation_node, orig_country_property, n_geo[str(int(row[1]))])) g.add((relation_node, pop_type_property, n_custom_cls[row[2]])) g.add((relation_node, year_property, Literal(int(row[3])))) g.add((relation_node, pop_amount_property, Literal(int(row[4])))) # write to output file g.serialize(destination=output_file, format='turtle')
def generate_authority_rdf(authority): g = Graph() auth = URIRef("http://data.isiscb.org/authority/" + authority.id) #urllib.quote(authority.name.replace(" ", "_"))) type = get_auth_type(authority.type_controlled) if not type: return '' g.add( (auth, RDF.type, type) ) g.add( (auth, RDF.type, madsrdf.Authority) ) g.add( (auth, RDFS.label, Literal(authority.name)) ) g.add( (auth, madsrdf.authoritativeLabel, Literal(authority.name)) ) for attr in authority.attributes.all(): attr_pred = get_property(attr.type_controlled.name) if attr_pred: g.add( (auth, attr_pred, Literal(attr.value_freeform))) nsMgr = NamespaceManager(g) nsMgr.bind('madsrdf', madsrdf) nsMgr.bind('isiscb', isisns) nsMgr.bind('isisvocab', isisns_props) return g.serialize(format='application/rdf+xml')
def create_molecule_graph(uri_base, mol): mongochem = Namespace('%s/api/v1/molecules/' % uri_base) g = Graph() inchi = mol['inchi'] name = mol.get('name') inchi_node = BNode() molecule = URIRef(mongochem[mol['_id']]) namespace_manager = NamespaceManager(g) namespace_manager.bind('cheminf', cheminf, override=False) namespace_manager.bind('mongochem', mongochem, override=False) namespace_manager.bind('owl', OWL, override=False) g.add((molecule, OWL.subClassOf, cheminf.CHEMINF_000000)) if name is not None: g.add((molecule, OWL.label, Literal(name.lower()))) g.add((inchi_node, RDF.type, cheminf.CHEMINF_000113)) g.add((inchi_node, cheminf.SIO_000300, Literal(inchi))) g.add((molecule, cheminf.CHEMINF_000200, inchi_node)) return g.serialize()
SKOS = Namespace('http://www.w3.org/2004/02/skos/core#') #RDF namespace RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') #CiTO namespace CITO = Namespace('http://purl.org/spar/cito/') #RDFS RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#') #LThe local namespace VLOCAL = Namespace('http://connect.unavco.org/ontology/vlocal#') #WGS84 namespace WGS84 = Namespace('http://www.w3.org/2003/01/geo/wgs84_pos#') #OWL namespace OWL = Namespace('http://www.w3.org/2002/07/owl#') VITROPUBLIC = Namespace('http://vitro.mannlib.cornell.edu/ns/vitro/public#') ns_manager = NamespaceManager(Graph()) ns_manager.bind('d', D) ns_manager.bind('vivo', VIVO) ns_manager.bind('vcard', VCARD) ns_manager.bind('obo', OBO) ns_manager.bind('bibo', BIBO) ns_manager.bind("foaf", FOAF) ns_manager.bind("skos", SKOS) ns_manager.bind("cito", CITO) ns_manager.bind("rdfs", RDFS) ns_manager.bind("vlocal", VLOCAL) ns_manager.bind("wgs84", WGS84) ns_manager.bind("vitropublic", VITROPUBLIC) ns_manager.bind("owl", OWL)
class ClaimsKGGenerator: def __init__(self, model_uri, sparql_wrapper=None, threshold=0.3, include_body: bool = False, resolve: bool = True, use_caching: bool = False): self._graph = rdflib.Graph() self.thesoz = SkosThesaurusMatcher( self._graph, thesaurus_path="claimskg/data/thesoz-komplett.xml", skos_xl_labels=True, prefix="http://lod.gesis.org/thesoz/") self._graph = self.thesoz.get_merged_graph() self.unesco = SkosThesaurusMatcher( self._graph, thesaurus_path="claimskg/data/unesco-thesaurus.xml", skos_xl_labels=False, prefix="http://vocabularies.unesco.org/thesaurus/") self._graph = self.unesco.get_merged_graph() self._graph.load("claimskg/data/dbpedia_categories_lang_en_skos.ttl", format="turtle") self._sparql_wrapper = sparql_wrapper # type: SPARQLWrapper self._uri_generator = ClaimsKGURIGenerator(model_uri) self._threshold = threshold self._include_body = include_body self._resolve = resolve self._use_caching = use_caching self.model_uri = model_uri self._namespace_manager = NamespaceManager(Graph()) self._claimskg_prefix = rdflib.Namespace(model_uri) self._namespace_manager.bind('claimskg', self._claimskg_prefix, override=False) self._namespace_manager.bind('base', self._claimskg_prefix, override=True) self.counter = TypedCounter() self._rdfs_prefix = rdflib.Namespace( "http://www.w3.org/2000/01/rdf-schema#") self._namespace_manager.bind('rdfs', self._rdfs_prefix, override=False) self._schema_prefix = rdflib.Namespace("http://schema.org/") self._namespace_manager.bind('schema', self._schema_prefix, override=False) self._namespace_manager.bind('owl', OWL, override=True) self._dbo_prefix = rdflib.Namespace("http://dbpedia.org/ontology/") self._namespace_manager.bind("dbo", self._dbo_prefix, override=False) self._dbr_prefix = rdflib.Namespace("http://dbpedia.org/resource/") self._namespace_manager.bind("dbr", self._dbr_prefix, override=False) self._dbc_prefix = rdflib.Namespace( "http://dbpedia.org/resource/Category_") self._namespace_manager.bind("dbc", self._dbr_prefix, override=False) self._dcat_prefix = rdflib.Namespace("http://www.w3.org/ns/dcat#") self._namespace_manager.bind("dcat", self._dcat_prefix, override=False) self._dct_prefix = rdflib.Namespace("http://purl.org/dc/terms/") self._namespace_manager.bind("dct", self._dct_prefix, override=False) self._foaf_prefix = rdflib.Namespace("http://xmlns.com/foaf/0.1/") self._namespace_manager.bind("foaf", self._foaf_prefix, override=False) self._vcard_prefix = rdflib.Namespace( "http://www.w3.org/2006/vcard/ns#") self._namespace_manager.bind("vcard", self._vcard_prefix, override=False) self._adms_prefix = Namespace("http://www.w3.org/ns/adms#") self._namespace_manager.bind("adms", self._adms_prefix, override=False) self._skos_prefix = Namespace("http://www.w3.org/2004/02/skos/core#") self._namespace_manager.bind("skos", self._skos_prefix, override=False) self._owl_same_as = URIRef(OWL['sameAs']) self._schema_claim_review_class_uri = URIRef( self._schema_prefix['ClaimReview']) self._schema_creative_work_class_uri = URIRef( self._schema_prefix['CreativeWork']) self._schema_organization_class_uri = URIRef( self._schema_prefix['Organization']) self._schema_thing_class_uri = URIRef(self._schema_prefix['Thing']) self._schema_rating_class_uri = URIRef(self._schema_prefix['Rating']) self._schema_language_class_uri = URIRef( self._schema_prefix['Language']) self._schema_claim_reviewed_property_uri = URIRef( self._schema_prefix['claimReviewed']) self._schema_url_property_uri = URIRef(self._schema_prefix['url']) self._schema_name_property_uri = URIRef(self._schema_prefix['name']) self._schema_date_published_property_uri = URIRef( self._schema_prefix['datePublished']) self._schema_in_language_preperty_uri = URIRef( self._schema_prefix['inLanguage']) self._schema_author_property_uri = URIRef( self._schema_prefix['author']) self._schema_same_as_property_uri = URIRef( self._schema_prefix['sameAs']) self._schema_citation_preperty_uri = URIRef( self._schema_prefix['citation']) self._schema_item_reviewed_property_uri = URIRef( self._schema_prefix['itemReviewed']) self._schema_alternate_name_property_uri = URIRef( self._schema_prefix['alternateName']) self._schema_description_property_uri = URIRef( self._schema_prefix['description']) self._schema_rating_value_property_uri = URIRef( self._schema_prefix['ratingValue']) self._schema_mentions_property_uri = URIRef( self._schema_prefix['mentions']) self._schema_keywords_property_uri = URIRef( self._schema_prefix['keywords']) self._schema_headline_property_uri = URIRef( self._schema_prefix['headline']) self._schema_review_body_property_uri = URIRef( self._schema_prefix['reviewBody']) self._schema_text_property_uri = URIRef(self._schema_prefix['text']) self._iso1_language_tag = "en" self._iso3_language_tag = "eng" self._english_uri = URIRef(self._claimskg_prefix["language/English"]) self._graph.add( (self._english_uri, RDF.type, self._schema_language_class_uri)) self._graph.add( (self._english_uri, self._schema_alternate_name_property_uri, Literal(self._iso1_language_tag))) self._graph.add((self._english_uri, self._schema_name_property_uri, Literal("English"))) self._nif_prefix = rdflib.Namespace( "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#") self._namespace_manager.bind('nif', self._nif_prefix, override=False) self._nif_RFC5147String_class_uri = URIRef( self._nif_prefix['RFC5147String']) self._nif_context_class_uri = URIRef(self._nif_prefix['Context']) self._nif_source_url_property_uri = URIRef( self._nif_prefix['sourceUrl']) self._nif_begin_index_property_uri = URIRef( self._nif_prefix["beginIndex"]) self._nif_end_index_property_uri = URIRef(self._nif_prefix["endIndex"]) self._nif_is_string_property_uri = URIRef(self._nif_prefix["isString"]) self._its_prefix = rdflib.Namespace( "https://www.w3.org/2005/11/its/rdf#") self._namespace_manager.bind('itsrdf', self._its_prefix, override=False) self.its_ta_confidence_property_uri = URIRef( self._its_prefix['taConfidence']) self.its_ta_ident_ref_property_uri = URIRef( self._its_prefix['taIdentRef']) self._logical_view_claims = [] # type: List[ClaimLogicalView] self._creative_works_index = [] self.keyword_uri_set = set() self.global_statistics = ClaimsKGStatistics() self.per_source_statistics = {} def _create_schema_claim_review(self, row, claim: ClaimLogicalView): claim_review_instance = self._uri_generator.claim_review_uri(row) self._graph.add((claim_review_instance, RDF.type, self._schema_claim_review_class_uri)) # claim_reviewed_value = _normalize_text_fragment(_row_string_value(row, "claimReview_claimReviewed")) # self._graph.add( # (claim_review_instance, self._schema_claim_reviewed_property_uri, # Literal(claim_reviewed_value, # lang=self._iso1_language_tag))) headline_value = _row_string_value(row, "extra_title") if len(headline_value) > 0: self._graph.add( (claim_review_instance, self._schema_headline_property_uri, Literal(headline_value, lang=self._iso1_language_tag))) claim.text_fragments.append(headline_value) claim.has_headline = True # Include body only if the option is enabled body_value = _row_string_value(row, "extra_body") if len(body_value) > 0: claim.has_body_text = True claim.text_fragments.append(_normalize_text_fragment(body_value)) if self._include_body: self._graph.add((claim_review_instance, self._schema_review_body_property_uri, Literal(body_value, lang=self._iso1_language_tag))) claim_review_url = row['claimReview_url'] claim.claim_review_url = claim_review_url if claim_review_url is not None: self._graph.add( (claim_review_instance, self._schema_url_property_uri, URIRef(row['claimReview_url']))) review_date = row['claimReview_datePublished'] if review_date: self._graph.add((claim_review_instance, self._schema_date_published_property_uri, Literal(review_date, datatype=XSD.date))) claim.review_date = datetime.datetime.strptime( review_date, "%Y-%m-%d").date() self._graph.add( (claim_review_instance, self._schema_in_language_preperty_uri, self._english_uri)) return claim_review_instance def _create_organization(self, row, claim): organization = self._uri_generator.organization_uri(row) self._graph.add( (organization, RDF.type, self._schema_organization_class_uri)) claim.claimreview_author = row['claimReview_author_name'] self._graph.add((organization, self._schema_name_property_uri, Literal(row['claimReview_author_name'], lang=self._iso1_language_tag))) author_name = _row_string_value(row, 'claimReview_author_name') if len(author_name) > 0: self._graph.add((organization, self._schema_url_property_uri, URIRef(source_uri_dict[author_name]))) return organization def _create_claims_kg_organization(self): organization = self._uri_generator.claimskg_organization_uri() self._graph.add( (organization, RDF.type, self._schema_organization_class_uri)) self._graph.add((organization, self._schema_name_property_uri, Literal("ClaimsKG"))) self._graph.add((organization, self._schema_url_property_uri, URIRef(self.model_uri))) def _reconcile_keyword_annotations(self, claim, keyword_uri, keyword, matching_annotations, type="thesoz"): for annotation in matching_annotations: self._graph.add((keyword_uri, URIRef(self._dct_prefix["about"]), URIRef(annotation[0]))) if type == "thesoz": claim.keywords_thesoz.add(keyword) else: claim.keywords_unesco.add(keyword) def _reconcile_keyword_mention_with_annotations(self, claim, mention, dbpedia_entity, keyword, matching_annotations, type="thesoz"): start = mention['begin'] end = mention['end'] for matching_annotation in matching_annotations: if start == matching_annotation[2] and end == matching_annotation[ 3]: if type == "thesoz": claim.keywords_thesoz_dbpedia.add(keyword) elif type == "unesco": claim.keywords_unesco_dbpedia.add(keyword) self._graph.add((URIRef(dbpedia_entity), OWL.sameAs, URIRef(matching_annotation[0]))) def _create_creative_work(self, row, claim: ClaimLogicalView): creative_work = self._uri_generator.creative_work_uri(row) self._graph.add( (creative_work, RDF.type, self._schema_creative_work_class_uri)) date_published_value = _row_string_value(row, "creativeWork_datePublished") if len(date_published_value) > 0: self._graph.add( (creative_work, self._schema_date_published_property_uri, Literal(date_published_value, datatype=XSD.date))) claim.claim_date = datetime.datetime.strptime( date_published_value, "%Y-%m-%d").date() keywords = row['extra_tags'] if isinstance(keywords, str) and len(keywords) > 0: keyword_mentions = self._process_json( row['extra_entities_keywords']) if not keyword_mentions: keyword_mentions = [] if ";" in keywords: keyword_list = keywords.split(";") else: keyword_list = keywords.split(",") for keyword in keyword_list: keyword = keyword.strip() keyword_uri = self._uri_generator.keyword_uri(keyword) if keyword_uri not in self.keyword_uri_set: self._graph.add( (keyword_uri, RDF.type, self._schema_thing_class_uri)) self._graph.add( (keyword_uri, self._schema_name_property_uri, Literal(keyword, lang=self._iso1_language_tag))) thesoz_matching_annotations = self.thesoz.find_keyword_matches( keyword) unesco_matching_annotations = self.unesco.find_keyword_matches( keyword) self._reconcile_keyword_annotations( claim, keyword_uri, keyword, thesoz_matching_annotations) self._reconcile_keyword_annotations( claim, keyword_uri, keyword, unesco_matching_annotations, type="unesco") for mention in keyword_mentions: if keyword.lower().strip() in mention['text'].lower( ).strip(): self.keyword_uri_set.add(keyword_uri) mention_instance, dbpedia_entity = self._create_mention( mention, claim, False) if mention_instance: claim.keywords_dbpedia.add(keyword) self._graph.add( (keyword_uri, self._schema_mentions_property_uri, mention_instance)) self._reconcile_keyword_mention_with_annotations( claim, mention, dbpedia_entity, keyword, thesoz_matching_annotations) self._reconcile_keyword_mention_with_annotations( claim, mention, dbpedia_entity, keyword, unesco_matching_annotations, type="unesco") claim.keywords.add(keyword.strip()) self._graph.add( (creative_work, self._schema_keywords_property_uri, keyword_uri)) links = row['extra_refered_links'] author_url = _row_string_value(row, 'claimReview_author_url') if links: links = links[1:-1].split(",") for link in links: stripped_link = link.strip() if len(stripped_link) > 0 and stripped_link[0] != "#" and re.match(_is_valid_url_regex, link.strip()) and link.strip() != \ source_uri_dict[ author_url]: link = link.strip().replace("\\", "").replace( "%20TARGET=prayer>adultery</A>%20was%20made%20public.%20</p>%0A", "").replace("\"", "").replace("<img%20src=?", "").replace( ">", "").replace("</", "").replace("<", "") parsed_url = urlparse(link) is_correct = (all([ parsed_url.scheme, parsed_url.netloc, parsed_url.path ]) and len(parsed_url.netloc.split(".")) > 1 and "<img" not in link) if is_correct: claim.links.append(link) # try: self._graph.add( (creative_work, self._schema_citation_preperty_uri, URIRef(parsed_url.scheme + "://" + parsed_url.netloc + parsed_url.path + "?" + parsed_url.query.replace("|", "%7C"). replace("^", "%5E").replace("\\", "%5C"). replace("{", "%7B").replace("}", "%7D"). replace("&", "%26").replace("=", "%3D")))) # except : # pass # Creative work author instantiation author_value = _row_string_value(row, "creativeWork_author_name") claim.creative_work_author = author_value claim_reviewed_value = _normalize_text_fragment( _row_string_value(row, "claimReview_claimReviewed")) claim.title = claim_reviewed_value self._graph.add((creative_work, self._schema_text_property_uri, Literal(claim_reviewed_value, lang=self._iso1_language_tag))) if len(author_value) > 0: creative_work_author = self._uri_generator.creative_work_author_uri( row) self._graph.add( (creative_work_author, RDF.type, self._schema_thing_class_uri)) author_mentions = self._process_json(row['extra_entities_author']) if not author_mentions: author_mentions = [] for mention in author_mentions: entity_uri = mention['entity'].replace(" ", "_") mention_instance = self._dbr_prefix[entity_uri] if mention_instance: self._graph.add( (creative_work_author, self._schema_mentions_property_uri, mention_instance)) self._graph.add( (creative_work_author, self._schema_name_property_uri, Literal(author_value, lang=self._iso1_language_tag))) self._graph.add((creative_work, self._schema_author_property_uri, creative_work_author)) # Todo: Reconcile author entities with DBPedia # self._graph.add((creative_work_author, self._schema_same_as_property_uri, Literal("dbpedia:link"))) self._creative_works_index.append(creative_work) return creative_work def _create_review_rating(self, row, claim): original_rating = self._uri_generator.create_original_rating_uri(row) rating_alternate_name = row['rating_alternateName'] if rating_alternate_name: escaped_alternate_rating_name = html.escape( row['rating_alternateName']).encode('ascii', 'xmlcharrefreplace') self._graph.add( (original_rating, self._schema_alternate_name_property_uri, Literal(escaped_alternate_rating_name))) self._graph.add( (original_rating, RDF.type, self._schema_rating_class_uri)) rating_value = row['rating_ratingValue'].replace("[", "").replace( "]", "").replace("'", "").replace(",", "").strip() if rating_value and len(rating_value) > 0: value = float(rating_value) self._graph.add( (original_rating, self._schema_rating_value_property_uri, Literal(value, datatype=XSD.float))) organization = self._uri_generator.organization_uri(row) self._graph.add( (original_rating, self._schema_author_property_uri, organization)) normalized_rating_enum = ratings.normalize( _row_string_value(row, "claimReview_author_name").lower(), _row_string_value(row, "rating_alternateName").lower()) claim.normalized_rating = normalized_rating_enum.name normalized_rating = self._uri_generator.create_normalized_rating_uri( normalized_rating_enum) self._graph.add( (normalized_rating, RDF.type, self._schema_rating_class_uri)) self._graph.add( (normalized_rating, self._schema_alternate_name_property_uri, Literal(str(normalized_rating_enum.name), lang=self._iso1_language_tag))) self._graph.add( (normalized_rating, self._schema_rating_value_property_uri, Literal(normalized_rating_enum.value, datatype=XSD.integer))) claimskg_org = self._uri_generator.claimskg_organization_uri() self._graph.add((normalized_rating, self._schema_author_property_uri, claimskg_org)) return original_rating, normalized_rating def _create_mention(self, mention_entry, claim: ClaimLogicalView, in_review): rho_value = float(mention_entry['score']) if rho_value > self._threshold: text = mention_entry['text'] start = mention_entry['begin'] end = mention_entry['end'] entity_uri = mention_entry['entity'].replace(" ", "_") categories = mention_entry['categories'] if len(categories) > 0: categories = categories[0].split(",") mention = self._uri_generator.mention_uri( start, end, text, entity_uri, rho_value, ",".join(claim.text_fragments)) self._graph.add((mention, RDF.type, self._nif_context_class_uri)) self._graph.add( (mention, RDF.type, self._nif_RFC5147String_class_uri)) self._graph.add((mention, self._nif_is_string_property_uri, Literal(text, lang=self._iso1_language_tag))) self._graph.add((mention, self._nif_begin_index_property_uri, Literal(int(start), datatype=XSD.integer))) self._graph.add((mention, self._nif_end_index_property_uri, Literal(int(end), datatype=XSD.integer))) # TODO: Fix values so that they aren't displayed in scientific notation self._graph.add( (mention, self.its_ta_confidence_property_uri, Literal(float(self._format_confidence_score(mention_entry)), datatype=XSD.float))) self._graph.add((mention, self.its_ta_ident_ref_property_uri, self._dbr_prefix[entity_uri])) if in_review: claim.review_entities.append(entity_uri) for category in categories: claim.review_entity_categories.append(category) else: claim.claim_entities.append(entity_uri) for category in categories: claim.claim_entity_categories.append(category) for category in categories: category = category.replace(" ", "_") self._graph.add((mention, URIRef(self._dct_prefix["about"]), URIRef(self._dbc_prefix[category]))) return mention, self._dbr_prefix[entity_uri] else: return None, None @staticmethod def _format_confidence_score(mention_entry): value = float(mention_entry['score']) rounded_to_two_decimals = round(value, 2) return str(rounded_to_two_decimals) def create_contact_vcard(self): atchechmedjiev_contact_vcard = URIRef( self._claimskg_prefix['atchechmedjiev_contact_vcard']) self._graph.add((atchechmedjiev_contact_vcard, RDF.type, URIRef(self._vcard_prefix['Individual']))) self._graph.add( (atchechmedjiev_contact_vcard, self._vcard_prefix['hasEmail'], URIRef("mailto:[email protected]"))) self._graph.add( (atchechmedjiev_contact_vcard, self._vcard_prefix['fn'], Literal("Andon Tchechmedjiev"))) return atchechmedjiev_contact_vcard def add_dcat_metadata(self): claimskg = rdflib.term.URIRef(self._claimskg_prefix['claimskg']) self._graph.add((claimskg, RDF.type, rdflib.term.URIRef(self._dcat_prefix['Dataset']))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dct_prefix['title']), Literal("ClaimsKG"))) self._graph.add(( claimskg, rdflib.term.URIRef(self._dct_prefix['description']), Literal("ClaimsKG: A Live Knowledge Graph ofFact-Checked Claims"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dct_prefix['issued']), rdflib.term.Literal("2019-04-10", datatype=XSD.date))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dct_prefix['modified']), rdflib.term.Literal(datetime.datetime.now(), datatype=XSD.date))) doi_org = URIRef(self._claimskg_prefix['doi_org_instance']) self._graph.add( (doi_org, RDF.type, URIRef(self._foaf_prefix['Organization']))) self._graph.add( (doi_org, RDFS.label, Literal("International DOI Foundation"))) self._graph.add((doi_org, self._foaf_prefix['homepage'], URIRef("https://www.doi.org/"))) identifier = URIRef(self._claimskg_prefix['doi_identifier']) self._graph.add( (identifier, RDF.type, self._adms_prefix['Identifier'])) self._graph.add((identifier, self._skos_prefix['notation'], URIRef("https://doi.org/10.5281/zenodo.2628745"))) self._graph.add((identifier, self._adms_prefix['schemaAgency'], Literal("International DOI Foundation"))) self._graph.add((identifier, self._dct_prefix['creator'], doi_org)) self._graph.add( (claimskg, rdflib.term.URIRef(self._dct_prefix['identifier']), rdflib.term.Literal("10.5281/zenodo.2628745"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dct_prefix['language']), rdflib.term.URIRef("http://id.loc.gov/vocabulary/iso639-1/en"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dct_prefix['accrualPeriodicity']), URIRef("http://purl.org/linked-data/sdmx/2009/code#freq-M"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dcat_prefix['keyword']), Literal("Claims"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dcat_prefix['keyword']), Literal("Facts"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dcat_prefix['keyword']), Literal("Fact-checking"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dcat_prefix['keyword']), Literal("Knowledge Graphs"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dcat_prefix['contactPoint']), self.create_contact_vcard())) # SPARQL Distribution sparql_claimskg_distribution = URIRef( self._claimskg_prefix['sparql_claimskg_distribution']) self._graph.add((sparql_claimskg_distribution, RDF.type, self._dcat_prefix['Distribution'])) self._graph.add( (sparql_claimskg_distribution, self._dct_prefix['title'], Literal("SPARQL endpoint"))) self._graph.add( (sparql_claimskg_distribution, self._dct_prefix['description'], Literal("The ClaimsKG SPARQL endpoint"))) self._graph.add((sparql_claimskg_distribution, rdflib.term.URIRef(self._dct_prefix['issued']), rdflib.term.Literal("2019-04-10", datatype=XSD.date))) self._graph.add((sparql_claimskg_distribution, rdflib.term.URIRef(self._dct_prefix['modified']), rdflib.term.Literal(datetime.datetime.now(), datatype=XSD.date))) licence_document = URIRef( "https://creativecommons.org/licenses/by/4.0/") self._graph.add( (licence_document, RDF.type, self._dct_prefix['LicenseDocument'])) self._graph.add((sparql_claimskg_distribution, rdflib.term.URIRef(self._dct_prefix['license']), licence_document)) self._graph.add((sparql_claimskg_distribution, rdflib.term.URIRef(self._dcat_prefix['accessURL']), Literal("https://data.gesis.org/claimskg/sparql"))) # Source code distribution sourcecode_claimskg_distribution = URIRef( self._claimskg_prefix['sourcecode_claimskg_distribution']) self._graph.add((sourcecode_claimskg_distribution, RDF.type, self._dcat_prefix['Distribution'])) self._graph.add( (sourcecode_claimskg_distribution, self._dct_prefix['title'], Literal("SPARQL endpoint"))) self._graph.add( (sourcecode_claimskg_distribution, self._dct_prefix['description'], Literal("The ClaimsKG Github repository group"))) self._graph.add((sourcecode_claimskg_distribution, rdflib.term.URIRef(self._dct_prefix['issued']), rdflib.term.Literal("2019-04-10", datatype=XSD.date))) self._graph.add((sourcecode_claimskg_distribution, rdflib.term.URIRef(self._dct_prefix['modified']), rdflib.term.Literal(datetime.datetime.now(), datatype=XSD.date))) self._graph.add((sourcecode_claimskg_distribution, rdflib.term.URIRef(self._dct_prefix['license']), licence_document)) self._graph.add((sourcecode_claimskg_distribution, rdflib.term.URIRef(self._dcat_prefix['accessURL']), Literal("https://github.com/claimskg"))) def generate_model(self, dataset_rows): row_counter = 0 self._graph.namespace_manager = self._namespace_manager total_entry_count = len(dataset_rows) self.add_dcat_metadata() progress_bar = tqdm(total=total_entry_count) for row in dataset_rows: row_counter += 1 progress_bar.update(1) logical_claim = ClaimLogicalView( ) # Instance holding claim raw information for mapping generation source_site = _row_string_value(row, 'claimReview_author_name') if source_site not in self.per_source_statistics.keys(): self.per_source_statistics[source_site] = ClaimsKGStatistics() claim_review_instance = self._create_schema_claim_review( row, logical_claim) organization = self._create_organization(row, logical_claim) self._graph.add((claim_review_instance, self._schema_author_property_uri, organization)) creative_work = self._create_creative_work(row, logical_claim) self._graph.add( (claim_review_instance, self._schema_item_reviewed_property_uri, creative_work)) logical_claim.creative_work_uri = creative_work original, normalized = self._create_review_rating( row, logical_claim) self._graph.add( (claim_review_instance, rdflib.term.URIRef(self._schema_prefix['reviewRating']), original)) self._graph.add( (claim_review_instance, rdflib.term.URIRef(self._schema_prefix['reviewRating']), normalized)) # For claim review mentions entities_json = row[ 'extra_entities_claimReview_claimReviewed'] # type: str loaded_json = self._process_json(entities_json) if loaded_json: for mention_entry in loaded_json: mention, dbpedia_entity = self._create_mention( mention_entry, logical_claim, True) if mention: self._graph.add( (creative_work, self._schema_mentions_property_uri, mention)) # For Creative Work mentions body_entities_json = row['extra_entities_body'] loaded_body_json = self._process_json(body_entities_json) if loaded_body_json: for mention_entry in loaded_body_json: mention, dbpedia_entity = self._create_mention( mention_entry, logical_claim, False) if mention: self._graph.add( (claim_review_instance, self._schema_mentions_property_uri, mention)) self._logical_view_claims.append(logical_claim) self.global_statistics.compute_stats_for_review(logical_claim) self.per_source_statistics[source_site].compute_stats_for_review( logical_claim) progress_bar.close() def _process_json(self, json_string): loaded_json = [] if json_string: json_string = re.sub("\",\"\"", ",\"", json_string) json_string = re.sub('"\n\t\"', "", json_string) json_string = re.sub('}\]\[\]', '}]', json_string) if json_string == "[[][]]": loaded_json = [] else: try: loaded_json = json.loads(json_string) except ValueError: loaded_json = None return loaded_json def export_rdf(self, format): print("\nGlobal dataset statistics") self.global_statistics.output_stats() print("\nPer source site statistics") for site in self.per_source_statistics.keys(): print("\n\n{site} statistics...".format(site=site)) self.per_source_statistics[site].output_stats() graph_serialization = self._graph.serialize(format=format, encoding='utf-8') return graph_serialization def reconcile_claims(self, embeddings, theta, keyword_weight, link_weight, text_weight, entity_weight, mappings_file_path=None, seed=None, samples=None): reconciler = FactReconciler(embeddings, self._use_caching, mappings_file_path, self._logical_view_claims, theta, keyword_weight, link_weight, text_weight, entity_weight, seed=seed, samples=samples) mappings = reconciler.generate_mappings() for mapping in mappings: if mapping is not None and mapping[ 1] is not None and mapping[1] != (None, None): source = mapping[1][0] target = mapping[1][1] self._graph.add((source.creative_work_uri, OWL.sameAs, target.creative_work_uri)) def materialize_indirect_claim_links(self): mdg = rdflib_to_networkx_multidigraph(self._graph) def align_duplicated(self): count = len(self._logical_view_claims) total = int(count * (count - 1) / 2) result = [ pair for pair in tqdm(itertools.combinations(range(count), 2), total=total) if self.compare_claim_titles(self._logical_view_claims[pair[0]], self._logical_view_claims[pair[1]]) ] for pair in result: self._graph.add( (self._creative_works_index[pair[0]], self._owl_same_as, self._creative_works_index[pair[1]])) self.global_statistics.count_mapping() self.per_source_statistics[self._logical_view_claims[ pair[0]].claimreview_author].count_mapping() def compare_claim_titles(self, claim_a, claim_b): return self._normalize_label(claim_a.title) == self._normalize_label( claim_b.title) def _normalize_label(self, label): return label.strip().lower().replace("\"", "").replace("'", "")
queries = [] generate_queries (data, queries, str(NS['sp'][model])) return queries # Query builder state variables main_types = [] data = {} loaded = False # Initialize the namespace manager object namespace_manager = NamespaceManager(Graph()) # Import the namespaces into the namespace manager for ns in NS.keys(): namespace_manager.bind(ns, NS[ns], override=False) # Parse the ontology when necessary if not rdf_ontology.api_types: rdf_ontology.parse_ontology(open(ONTOLOGY_PATH).read()) # Build a list of data types that need to be added to the data definitions for t in rdf_ontology.api_types: if t.is_statement or len(t.calls) > 0 or rdf_ontology.sp.Component in [x.uri for x in t.parents]: main_types.append(t) # Build the data definitions object with each data type for t in main_types: generate_data_for_type(t, data)
def create_graph(): import pymysql as mdb import rdflib from rdflib import URIRef, Literal, Namespace, Graph from rdflib.namespace import NamespaceManager con = mdb.connect(mysql_conf['host'], mysql_conf['username'], mysql_conf['password'], mysql_conf['db']) conn = con.cursor(mdb.cursors.DictCursor) g = Graph() namespace_manager = NamespaceManager(g) RDF = Namespace('rdf:') namespace_manager.bind('RDF', RDF, override=False) FOAF = Namespace('foaf:') namespace_manager.bind('foaf', FOAF, override=False) WB = Namespace('wb:') namespace_manager.bind('wb:', WB, override=False) print("Dump users") conn.execute("SELECT * FROM user;") for user in conn.fetchall(): uid = URIRef(user['uid']) g.add((uid, RDF.type, FOAF.Person)) for k, v in user.items(): if k in ['uid']: continue g.add((uid, FOAF[k], Literal(v))) print("Dump user relations") conn.execute("SELECT * FROM userrelation;") for user_rel in conn.fetchall(): suid = URIRef(user_rel['suid']) tuid = URIRef(user_rel['tuid']) g.add((suid, FOAF.knows, tuid)) print("Dump weibo") conn.execute("SELECT * FROM weibo;") for weibo in conn.fetchall(): uid = URIRef(weibo['uid']) mid = URIRef(weibo['mid']) g.add((mid, RDF.type, WB.Post)) g.add((uid, FOAF.posted, mid)) for k, v in weibo.items(): if k in ['uid', 'mid']: continue g.add((mid, WB[k], Literal(v))) print("Dump weibo relations") conn.execute("SELECT * FROM weiborelation;") for weibo_rel in conn.fetchall(): smid = URIRef(weibo_rel['smid']) tmid = URIRef(weibo_rel['tmid']) g.add((smid, WB.shared, tmid)) g.serialize(destination=os.path.join(path, "data/weibo.nt"), format='nt')
'''Returns a list of test sparql queries for the given model''' queries = [] generate_queries (data, queries, str(NS['sp'][model])) return queries # Query builder state variables main_types = [] data = {} loaded = False # Initialize the namespace manager object namespace_manager = NamespaceManager(Graph()) # Import the namespaces into the namespace manager for ns in NS.keys(): namespace_manager.bind(ns, NS[ns], override=False) # Parse the ontology when necessary if not rdf_ontology.api_types: rdf_ontology.parse_ontology(open(APP_PATH + '/data/smart.owl').read()) # Build a list of data types that need to be added to the data definitions for t in rdf_ontology.api_types: if t.is_statement or len(t.calls) > 0 or rdf_ontology.sp.Component in [x.uri for x in t.parents]: main_types.append(t) # Build the data definitions object with each data type for t in main_types: generate_data_for_type(t, data)
# Host config HOSTNAME = socket.gethostname() # S3 config os.environ["AWS_SHARED_CREDENTIALS_FILE"] = "~/.aws/credentials" ARCHIVE_BUCKET = "archive.tbrc.org" OCR_OUTPUT_BUCKET = "ocr.bdrc.io" S3 = boto3.resource("s3") S3_client = boto3.client("s3") archive_bucket = S3.Bucket(ARCHIVE_BUCKET) ocr_output_bucket = S3.Bucket(OCR_OUTPUT_BUCKET) # URI config BDR = Namespace("http://purl.bdrc.io/resource/") NSM = NamespaceManager(rdflib.Graph()) NSM.bind("bdr", BDR) # s3 bucket directory config SERVICE = "vision" BATCH_PREFIX = "batch" IMAGES = "images" OUTPUT = "output" INFO_FN = "info.json" # local directory config DATA_PATH = Path("./archive") IMAGES_BASE_DIR = DATA_PATH / IMAGES OCR_BASE_DIR = DATA_PATH / OUTPUT CHECK_POINT_FN = DATA_PATH / "checkpoint.json" # Checkpoint config
BIBO = Namespace('http://purl.org/ontology/bibo/') FOAF = Namespace('http://xmlns.com/foaf/0.1/') SKOS = Namespace('http://www.w3.org/2004/02/skos/core#') VCARD = Namespace('http://www.w3.org/2006/vcard/ns#') OBO = Namespace('http://purl.obolibrary.org/obo/') CONVERIS = Namespace('http://localhost/ontology/converis#') FHP = Namespace('http://vivo.fredhutch.org/ontology/publications#') FHD = Namespace('http://vivo.fredhutch.org/ontology/display#') #tmp graph for in memory graphs TMP = Namespace('http://localhost/tmp#') namespaces = {} for k, o in vars().items(): if isinstance(o, (Namespace, ClosedNamespace)): namespaces[k] = o ns_mgr = NamespaceManager(Graph()) for k, v in namespaces.items(): ns_mgr.bind(k.lower(), v) rq_prefixes = u"\n".join("prefix %s: <%s>" % (k.lower(), v) for k, v in namespaces.items()) prefixes = u"\n ".join("%s: %s" % (k.lower(), v) for k, v in namespaces.items() if k not in u'RDF RDFS OWL XSD') #namespace setup complete
from django.core.exceptions import ValidationError from rdflib import Graph, BNode from rdflib.collection import Collection from rdflib.namespace import Namespace, NamespaceManager, DC, DCTERMS, RDF, RDFS from rdflib.plugin import register from rdflib.plugins.serializers.rdfxml import XMLLANG, OWL_NS, XMLBASE from rdflib.plugins.serializers.xmlwriter import XMLWriter from rdflib.serializer import Serializer from rdflib.term import Literal, URIRef from rdflib.util import first HSTERMS = Namespace("https://www.hydroshare.org/terms/") RDFS1 = Namespace("http://www.w3.org/2000/01/rdf-schema#") NAMESPACE_MANAGER = NamespaceManager(Graph()) NAMESPACE_MANAGER.bind('hsterms', HSTERMS, override=False) NAMESPACE_MANAGER.bind("rdfs1", RDFS1, override=False) NAMESPACE_MANAGER.bind('dc', DC, override=False) NAMESPACE_MANAGER.bind('dcterms', DCTERMS, override=False) class RDF_MetaData_Mixin(object): """ A mixin for MetaData objects which store their metadata in generic relations. If metadata outside of generic relations need to be used, you may extend ingest_metadata and get_rdf_graph to include the other metadata elements """ def rdf_subject(self): raise NotImplementedError("RDF_Metadata_Mixin implementations must implement rdf_subject") def rdf_metadata_subject(self):
def main(): from optparse import OptionParser op = OptionParser( 'usage: %prog [options] factFile1 factFile2 ... factFileN') op.add_option( '--why', default=None, help='Specifies the goals to solve for using the non-naive methods' + 'see --method') op.add_option( '--closure', action='store_true', default=False, help='Whether or not to serialize the inferred triples' + ' along with the original triples. Otherwise ' + '(the default behavior), serialize only the inferred triples') op.add_option( '--imports', action='store_true', default=False, help='Whether or not to follow owl:imports in the fact graph') op.add_option( '--output', default='n3', metavar='RDF_FORMAT', choices=[ 'xml', 'TriX', 'n3', 'pml', 'proof-graph', 'nt', 'rif', 'rif-xml', 'conflict', 'man-owl' ], help= "Serialize the inferred triples and/or original RDF triples to STDOUT " + "using the specified RDF syntax ('xml', 'pretty-xml', 'nt', 'turtle', " + "or 'n3') or to print a summary of the conflict set (from the RETE " + "network) if the value of this option is 'conflict'. If the the " + " value is 'rif' or 'rif-xml', Then the rules used for inference " + "will be serialized as RIF. If the value is 'pml' and --why is used, " + " then the PML RDF statements are serialized. If output is " + "'proof-graph then a graphviz .dot file of the proof graph is printed. " + "Finally if the value is 'man-owl', then the RDF facts are assumed " + "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default" ) op.add_option( '--class', dest='classes', action='append', default=[], metavar='QNAME', help='Used with --output=man-owl to determine which ' + 'classes within the entire OWL/RDF are targetted for serialization' + '. Can be used more than once') op.add_option( '--hybrid', action='store_true', default=False, help='Used with with --method=bfp to determine whether or not to ' + 'peek into the fact graph to identify predicates that are both ' + 'derived and base. This is expensive for large fact graphs' + 'and is explicitely not used against SPARQL endpoints') op.add_option( '--property', action='append', dest='properties', default=[], metavar='QNAME', help='Used with --output=man-owl or --extract to determine which ' + 'properties are serialized / extracted. Can be used more than once') op.add_option( '--normalize', action='store_true', default=False, help= "Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]" + "The default is %default") op.add_option( '--ddlGraph', default=False, help= "The location of a N3 Data Description document describing the IDB predicates" ) op.add_option( '--input-format', default='xml', dest='inputFormat', metavar='RDF_FORMAT', choices=['xml', 'trix', 'n3', 'nt', 'rdfa'], help= "The format of the RDF document(s) which serve as the initial facts " + " for the RETE network. One of 'xml', 'n3', 'trix', 'nt', " + "or 'rdfa'. The default is %default") op.add_option( '--safety', default='none', metavar='RULE_SAFETY', choices=['loose', 'strict', 'none'], help="Determines how to handle RIF Core safety. A value of 'loose' " + " means that unsafe rules will be ignored. A value of 'strict' " + " will cause a syntax exception upon any unsafe rule. A value of " + "'none' (the default) does nothing") op.add_option( '--pDSemantics', action='store_true', default=False, help= 'Used with --dlp to add pD semantics ruleset for semantics not covered ' + 'by DLP but can be expressed in definite Datalog Logic Programming' + ' The default is %default') op.add_option( '--stdin', action='store_true', default=False, help= 'Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ' ) op.add_option( '--ns', action='append', default=[], metavar="PREFIX=URI", help='Register a namespace binding (QName prefix to a base URI). This ' + 'can be used more than once') op.add_option( '--rules', default=[], action='append', metavar='PATH_OR_URI', help='The Notation 3 documents to use as rulesets for the RETE network' + '. Can be specified more than once') op.add_option('-d', '--debug', action='store_true', default=True, help='Include debugging output') op.add_option( '--strictness', default='defaultBase', metavar='DDL_STRICTNESS', choices=['loose', 'defaultBase', 'defaultDerived', 'harsh'], help= 'Used with --why to specify whether to: *not* check if predicates are ' + ' both derived and base (loose), if they are, mark as derived (defaultDerived) ' + 'or as base (defaultBase) predicates, else raise an exception (harsh)') op.add_option( '--method', default='naive', metavar='reasoning algorithm', choices=['gms', 'bfp', 'naive'], help='Used with --why to specify how to evaluate answers for query. ' + 'One of: gms, sld, bfp, naive') op.add_option( '--firstAnswer', default=False, action='store_true', help= 'Used with --why to determine whether to fetch all answers or just ' + 'the first') op.add_option( '--edb', default=[], action='append', metavar='EXTENSIONAL_DB_PREDICATE_QNAME', help= 'Used with --why/--strictness=defaultDerived to specify which clashing ' + 'predicate will be designated as a base predicate') op.add_option( '--idb', default=[], action='append', metavar='INTENSIONAL_DB_PREDICATE_QNAME', help= 'Used with --why/--strictness=defaultBase to specify which clashing ' + 'predicate will be designated as a derived predicate') op.add_option( '--hybridPredicate', default=[], action='append', metavar='PREDICATE_QNAME', help= 'Used with --why to explicitely specify a hybrid predicate (in both ' + ' IDB and EDB) ') op.add_option( '--noMagic', default=[], action='append', metavar='DB_PREDICATE_QNAME', help='Used with --why to specify that the predicate shouldnt have its ' + 'magic sets calculated') op.add_option( '--filter', action='append', default=[], metavar='PATH_OR_URI', help= 'The Notation 3 documents to use as a filter (entailments do not particpate in network)' ) op.add_option( '--ruleFacts', action='store_true', default=False, help="Determines whether or not to attempt to parse initial facts from " + "the rule graph. The default is %default") op.add_option( '--builtins', default=False, metavar='PATH_TO_PYTHON_MODULE', help="The path to a python module with function definitions (and a " + "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations" ) op.add_option( '--dlp', action='store_true', default=False, help= 'Use Description Logic Programming (DLP) to extract rules from OWL/RDF. The default is %default' ) op.add_option( '--sparqlEndpoint', action='store_true', default=False, help= 'Indicates that the sole argument is the URI of a SPARQL endpoint to query' ) op.add_option( '--ontology', action='append', default=[], metavar='PATH_OR_URI', help= 'The path to an OWL RDF/XML graph to use DLP to extract rules from ' + '(other wise, fact graph(s) are used) ') op.add_option( '--ontologyFormat', default='xml', dest='ontologyFormat', metavar='RDF_FORMAT', choices=['xml', 'trix', 'n3', 'nt', 'rdfa'], help= "The format of the OWL RDF/XML graph specified via --ontology. The default is %default" ) op.add_option( '--builtinTemplates', default=None, metavar='N3_DOC_PATH_OR_URI', help= 'The path to an N3 document associating SPARQL FILTER templates to ' + 'rule builtins') op.add_option('--negation', action='store_true', default=False, help='Extract negative rules?') op.add_option( '--normalForm', action='store_true', default=False, help='Whether or not to reduce DL axioms & LP rules to a normal form') (options, facts) = op.parse_args() nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'} for nsBind in options.ns: pref, nsUri = nsBind.split('=') nsBinds[pref] = nsUri namespace_manager = NamespaceManager(Graph()) if options.sparqlEndpoint: factGraph = Graph(plugin.get('SPARQLStore', Store)(facts[0])) options.hybrid = False else: factGraph = Graph() ruleSet = Ruleset() for fileN in options.rules: if options.ruleFacts and not options.sparqlEndpoint: factGraph.parse(fileN, format='n3') print("Parsing RDF facts from ", fileN) if options.builtins: import imp userFuncs = imp.load_source('builtins', options.builtins) rs = HornFromN3(fileN, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS) else: rs = HornFromN3(fileN) nsBinds.update(rs.nsMapping) ruleSet.formulae.extend(rs) #ruleGraph.parse(fileN, format='n3') ruleSet.nsMapping = nsBinds for prefix, uri in list(nsBinds.items()): namespace_manager.bind(prefix, uri, override=False) closureDeltaGraph = Graph() closureDeltaGraph.namespace_manager = namespace_manager factGraph.namespace_manager = namespace_manager if not options.sparqlEndpoint: for fileN in facts: factGraph.parse(fileN, format=options.inputFormat) if options.imports: for owlImport in factGraph.objects(predicate=OWL_NS.imports): factGraph.parse(owlImport) print("Parsed Semantic Web Graph.. ", owlImport) if not options.sparqlEndpoint and facts: for pref, uri in factGraph.namespaces(): nsBinds[pref] = uri if options.stdin: assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint" factGraph.parse(sys.stdin, format=options.inputFormat) #Normalize namespace mappings #prune redundant, rdflib-allocated namespace prefix mappings newNsMgr = NamespaceManager(factGraph) from FuXi.Rete.Util import CollapseDictionary for k, v in list( CollapseDictionary( dict([(k, v) for k, v in factGraph.namespaces()])).items()): newNsMgr.bind(k, v) factGraph.namespace_manager = newNsMgr if options.normalForm: NormalFormReduction(factGraph) if not options.sparqlEndpoint: workingMemory = generateTokenSet(factGraph) if options.builtins: import imp userFuncs = imp.load_source('builtins', options.builtins) rule_store, rule_graph, network = SetupRuleStore( makeNetwork=True, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS) else: rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True) network.inferredFacts = closureDeltaGraph network.nsMap = nsBinds if options.dlp: from FuXi.DLP.DLNormalization import NormalFormReduction if options.ontology: ontGraph = Graph() for fileN in options.ontology: ontGraph.parse(fileN, format=options.ontologyFormat) for prefix, uri in ontGraph.namespaces(): nsBinds[prefix] = uri namespace_manager.bind(prefix, uri, override=False) if options.sparqlEndpoint: factGraph.store.bind(prefix, uri) else: ontGraph = factGraph NormalFormReduction(ontGraph) dlp = network.setupDescriptionLogicProgramming( ontGraph, addPDSemantics=options.pDSemantics, constructNetwork=False, ignoreNegativeStratus=options.negation, safety=safetyNameMap[options.safety]) ruleSet.formulae.extend(dlp) if options.output == 'rif' and not options.why: for rule in ruleSet: print(rule) if options.negation: for nRule in network.negRules: print(nRule) elif options.output == 'man-owl': cGraph = network.closureGraph(factGraph, readOnly=False) cGraph.namespace_manager = namespace_manager Individual.factoryGraph = cGraph if options.classes: mapping = dict(namespace_manager.namespaces()) for c in options.classes: pref, uri = c.split(':') print(Class(URIRef(mapping[pref] + uri)).__repr__(True)) elif options.properties: mapping = dict(namespace_manager.namespaces()) for p in options.properties: pref, uri = p.split(':') print(Property(URIRef(mapping[pref] + uri))) else: for p in AllProperties(cGraph): print(p.identifier, first(p.label)) print(repr(p)) for c in AllClasses(cGraph): if options.normalize: if c.isPrimitive(): primAnc = [ sc for sc in c.subClassOf if sc.isPrimitive() ] if len(primAnc) > 1: warnings.warn( "Branches of primitive skeleton taxonomy" + " should form trees: %s has %s primitive parents: %s" % (c.qname, len(primAnc), primAnc), UserWarning, 1) children = [desc for desc in c.subSumpteeIds()] for child in children: for otherChild in [ o for o in children if o is not child ]: if not otherChild in [ c.identifier for c in Class(child).disjointWith ]: # and \ warnings.warn( "Primitive children (of %s) " % (c.qname) + \ "must be mutually disjoint: %s and %s" % ( Class(child).qname, Class(otherChild).qname), UserWarning, 1) # if not isinstance(c.identifier, BNode): print(c.__repr__(True)) if not options.why: # Naive construction of graph for rule in ruleSet: network.buildNetworkFromClause(rule) magicSeeds = [] if options.why: builtinTemplateGraph = Graph() if options.builtinTemplates: builtinTemplateGraph = Graph().parse(options.builtinTemplates, format='n3') factGraph.templateMap = \ dict([(pred, template) for pred, _ignore, template in builtinTemplateGraph.triples( (None, TEMPLATES.filterTemplate, None))]) goals = [] query = ParseSPARQL(options.why) network.nsMap['pml'] = PML network.nsMap['gmp'] = GMP_NS network.nsMap['owl'] = OWL_NS nsBinds.update(network.nsMap) network.nsMap = nsBinds if not query.prologue: query.prologue = Prologue(None, []) query.prologue.prefixBindings.update(nsBinds) else: for prefix, nsInst in list(nsBinds.items()): if prefix not in query.prologue.prefixBindings: query.prologue.prefixBindings[prefix] = nsInst print("query.prologue", query.prologue) print("query.query", query.query) print("query.query.whereClause", query.query.whereClause) print("query.query.whereClause.parsedGraphPattern", query.query.whereClause.parsedGraphPattern) goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern( query.query.whereClause.parsedGraphPattern, query.prologue).patterns]) # dPreds=[]# p for s, p, o in goals ] # print("goals", goals) magicRuleNo = 0 bottomUpDerivedPreds = [] # topDownDerivedPreds = [] defaultBasePreds = [] defaultDerivedPreds = set() hybridPredicates = [] mapping = dict(newNsMgr.namespaces()) for edb in options.edb: pref, uri = edb.split(':') defaultBasePreds.append(URIRef(mapping[pref] + uri)) noMagic = [] for pred in options.noMagic: pref, uri = pred.split(':') noMagic.append(URIRef(mapping[pref] + uri)) if options.ddlGraph: ddlGraph = Graph().parse(options.ddlGraph, format='n3') # @TODO: should also get hybrid predicates from DDL graph defaultDerivedPreds = IdentifyDerivedPredicates( ddlGraph, Graph(), ruleSet) else: for idb in options.idb: pref, uri = idb.split(':') defaultDerivedPreds.add(URIRef(mapping[pref] + uri)) defaultDerivedPreds.update( set([p == RDF.type and o or p for s, p, o in goals])) for hybrid in options.hybridPredicate: pref, uri = hybrid.split(':') hybridPredicates.append(URIRef(mapping[pref] + uri)) if options.method == 'gms': for goal in goals: goalSeed = AdornLiteral(goal).makeMagicPred() print("Magic seed fact (used in bottom-up evaluation)", goalSeed) magicSeeds.append(goalSeed.toRDFTuple()) if noMagic: print("Predicates whose magic sets will not be calculated") for p in noMagic: print("\t", factGraph.qname(p)) for rule in MagicSetTransformation( factGraph, ruleSet, goals, derivedPreds=bottomUpDerivedPreds, strictCheck=nameMap[options.strictness], defaultPredicates=(defaultBasePreds, defaultDerivedPreds), noMagic=noMagic): magicRuleNo += 1 network.buildNetworkFromClause(rule) if len(list(ruleSet)): print("reduction in size of program: %s (%s -> %s clauses)" % (100 - (float(magicRuleNo) / float(len(list(ruleSet)))) * 100, len(list(ruleSet)), magicRuleNo)) start = time.time() network.feedFactsToAdd(generateTokenSet(magicSeeds)) if not [ rule for rule in factGraph.adornedProgram if len(rule.sip) ]: warnings.warn( "Using GMS sideways information strategy with no " + "information to pass from query. Falling back to " + "naive method over given facts and rules") network.feedFactsToAdd(workingMemory) sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds" % sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds" % sTime print("Time to calculate closure on working memory: ", sTimeStr) if options.output == 'rif': print("Rules used for bottom-up evaluation") if network.rules: for clause in network.rules: print(clause) else: for clause in factGraph.adornedProgram: print(clause) if options.output == 'conflict': network.reportConflictSet() elif options.method == 'bfp': topDownDPreds = defaultDerivedPreds if options.builtinTemplates: builtinTemplateGraph = Graph().parse(options.builtinTemplates, format='n3') builtinDict = dict([ (pred, template) for pred, _ignore, template in builtinTemplateGraph.triples((None, TEMPLATES.filterTemplate, None)) ]) else: builtinDict = None topDownStore = TopDownSPARQLEntailingStore( factGraph.store, factGraph, idb=ruleSet, DEBUG=options.debug, derivedPredicates=topDownDPreds, templateMap=builtinDict, nsBindings=network.nsMap, identifyHybridPredicates=options.hybrid if options.method == 'bfp' else False, hybridPredicates=hybridPredicates) targetGraph = Graph(topDownStore) for pref, nsUri in list(network.nsMap.items()): targetGraph.bind(pref, nsUri) start = time.time() # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal) for goal in goals], # targetGraph) # query = queryLiteral.asSPARQL() # print("Goal to solve ", query) sTime = time.time() - start result = targetGraph.query(options.why, initNs=network.nsMap) if result.askAnswer: sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds" % sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds" % sTime print("Time to reach answer ground goal answer of %s: %s" % (result.askAnswer[0], sTimeStr)) else: for rt in result: sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds" % sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds" % sTime if options.firstAnswer: break print( "Time to reach answer %s via top-down SPARQL sip strategy: %s" % (rt, sTimeStr)) if options.output == 'conflict' and options.method == 'bfp': for _network, _goal in topDownStore.queryNetworks: print(network, _goal) _network.reportConflictSet(options.debug) for query in topDownStore.edbQueries: print(query.asSPARQL()) elif options.method == 'naive': start = time.time() network.feedFactsToAdd(workingMemory) sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds" % sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds" % sTime print("Time to calculate closure on working memory: ", sTimeStr) print(network) if options.output == 'conflict': network.reportConflictSet() for fileN in options.filter: for rule in HornFromN3(fileN): network.buildFilterNetworkFromClause(rule) if options.negation and network.negRules and options.method in [ 'both', 'bottomUp' ]: now = time.time() rt = network.calculateStratifiedModel(factGraph) print( "Time to calculate stratified, stable model (inferred %s facts): %s" % (rt, time.time() - now)) if options.filter: print("Applying filter to entailed facts") network.inferredFacts = network.filteredFacts if options.closure and options.output in RDF_SERIALIZATION_FORMATS: cGraph = network.closureGraph(factGraph) cGraph.namespace_manager = namespace_manager print( cGraph.serialize(destination=None, format=options.output, base=None)) elif options.output and options.output in RDF_SERIALIZATION_FORMATS: print( network.inferredFacts.serialize(destination=None, format=options.output, base=None))
def addattributes(): carConfig = getCarConfig() embeddedSignals = getConfigNames(carConfig) url = SERVER_URL #Binding of namespaces namespace_manager = NamespaceManager(Graph()) namespace_manager.bind("rdfs", rdfs, override=False) #namespace_manager.bind("vss",vss,override=False) namespace_manager.bind("ssn", ssn, override=False) namespace_manager.bind("sosa", sosa, override=False) namespace_manager.bind("geo", geo, override=False) namespace_manager.bind("sf", sf, override=False) namespace_manager.bind("qudt-1-1", qudt11, override=False) namespace_manager.bind("qudt-unit-1-1", qudtunit11, override=False) namespace_manager.bind("dbr", dbr, override=False) namespace_manager.bind("vso", vso, override=False) namespace_manager.bind("step", step, override=False) namespace_manager.bind("time", otime, override=False) g.namespace_manager = namespace_manager #Creation of triples about the car g.add((MyCar, RDF.type, sosa.FeatureOfInterest)) g.add((MyCar, RDF.type, vso.Automobile)) g.add((MyCar, RDF.type, geo.Feature)) #For every sensor provided as an input, if it is known, its is attached to a sensorType (from DBpedia), a unit and an observable property. for signal in getVSS(embeddedSignals): sensorType = signal.sensor unit = signal.unit observableProperty = signal.uri.split('#')[-1] Sensor = BNode() ObservableProperty = signal.uri g.add((Sensor, RDF.type, vso.FeatureValue)) g.add((Sensor, RDF.type, sensorType)) g.add((MyCar, vso.feature, Sensor)) g.add((Sensor, sosa.observes, ObservableProperty)) g.add((ObservableProperty, rdfs.label, Literal(observableProperty))) g.add((ObservableProperty, RDF.type, sosa.ObservableProperty)) g.add((ObservableProperty, qudt11.Unit, unit)) #The graph is stored file = open("outputFile.ttl", "w") file.write(g.serialize(format='turtle')) file.close() #Return the graph #TO DO: check the missing prefixes return g.serialize(format='turtle')
def sparql_to_rgraph(query): """ Transform SPARQL query into its R-graph representation. All BNodes are scoped within their BGPs and assigned a fresh and unique BNode label. All non-projection variables are assigned a fresh and unique BNode label on global scope. :param query: SPARQL algebra expression :return: rdflib.Graph """ import networkx as nx import rdflib from rdflib import URIRef, BNode, Literal from rdflib.namespace import RDF def ground_projected_variables(r_graph): """ Ground projected variables to avoid their removal during minimisation. :param r_graph: :return: """ proj_vars = set( r_graph.objects(None, rdflib.term.URIRef("http://www.dfki.de/voc#var"))) for proj_var in proj_vars: r_graph.skolemize(new_graph=r_graph, bnode=rdflib.term.BNode(proj_var)) r_graph.remove((None, None, rdflib.term.BNode(proj_var))) return r_graph def sparql_iter(expr, ctx, ctx_graph): """ Traverse SPARQL algebra expression and build rdflib.Graph :param expr: SPARQL algebra expression :param ctx: rdflib.Identifier as ctx node :return: rdflib.Graph for SPARQL query expression expr """ # handles a triple pattern expression if isinstance(expr, tuple): # each triple pattern is a subgraph of its ctx node triple = BNode() ctx_graph.add((triple, RDF.type, URIRef("http://www.dfki.de/voc#TriplePattern"))) ctx_graph.add((ctx, URIRef("http://www.dfki.de/voc#arg"), triple)) if isinstance(expr[0], rdflib.term.URIRef): ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#s"), URIRef(expr[0]))) elif isinstance(expr[0], rdflib.term.Literal): ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#s"), Literal(expr[0]))) elif isinstance(expr[0], rdflib.term.BNode): # blank nodes are scoped to the basic graph pattern. ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#s"), BNode(ctx.__str__() + "__" + expr[0]))) elif isinstance(expr[0], rdflib.term.Variable): # Note that only variables projected out of the subquery will be visible if BNode(expr[0]) in set( ctx_graph.objects( None, URIRef("http://www.dfki.de/voc#var"))): ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#s"), BNode(expr[0]))) else: ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#s"), BNode(ctx.__str__() + "__" + expr[0]))) if isinstance(expr[1], rdflib.term.URIRef): ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#p"), URIRef(expr[1]))) elif isinstance(expr[1], rdflib.term.Literal): ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#p"), Literal(expr[1]))) elif isinstance(expr[1], rdflib.term.BNode): # blank nodes are scoped to the basic graph pattern. ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#p"), BNode(ctx.__str__() + "__" + expr[1]))) elif isinstance(expr[1], rdflib.term.Variable): # Note that only variables projected out of the subquery will be visible if BNode(expr[1]) in set( ctx_graph.objects( None, URIRef("http://www.dfki.de/voc#var"))): ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#p"), BNode(expr[1]))) else: ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#p"), BNode(ctx.__str__() + "__" + expr[1]))) if isinstance(expr[2], rdflib.term.URIRef): ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#o"), URIRef(expr[2]))) elif isinstance(expr[2], rdflib.term.Literal): ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#o"), Literal(expr[2]))) elif isinstance(expr[2], rdflib.term.BNode): # blank nodes are scoped to the basic graph pattern. ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#o"), BNode(ctx.__str__() + "__" + expr[2]))) elif isinstance(expr[2], rdflib.term.Variable): # Note that only variables projected out of the subquery will be visible if BNode(expr[2]) in set( ctx_graph.objects( None, URIRef("http://www.dfki.de/voc#var"))): ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#o"), BNode(expr[2]))) else: ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#o"), BNode(ctx.__str__() + "__" + expr[2]))) return ctx_graph # handles a Basic Graph Pattern as n-ary JOINs if expr.name == "BGP": # each BGP is a subgraph of its ctx node if (ctx, RDF.type, URIRef("http://www.dfki.de/voc#Join")) in ctx_graph: # if ctx node is a JOIN, we add all triple patterns as arguments for arg in set(expr['triples']): ctx_graph = ctx_graph + sparql_iter(arg, ctx, ctx_graph) else: # if ctx node is NOT a JOIN, we turn the BGP into a JOIN node join = BNode() ctx_graph.add( (join, RDF.type, URIRef("http://www.dfki.de/voc#Join"))) ctx_graph.add( (ctx, URIRef("http://www.dfki.de/voc#arg"), join)) for arg in set(expr['triples']): ctx_graph = ctx_graph + sparql_iter(arg, join, ctx_graph) return ctx_graph # handles a JOIN node if expr.name == "Join": # make nested JOINs n-ary operators if (ctx, RDF.type, URIRef("http://www.dfki.de/voc#Join")) in ctx_graph: ctx_graph = ctx_graph + sparql_iter(expr['p1'], ctx, ctx_graph) ctx_graph = ctx_graph + sparql_iter(expr['p2'], ctx, ctx_graph) else: join = BNode() ctx_graph.add( (join, RDF.type, URIRef("http://www.dfki.de/voc#Join"))) ctx_graph.add( (ctx, URIRef("http://www.dfki.de/voc#arg"), join)) ctx_graph = ctx_graph + sparql_iter(expr['p1'], join, ctx_graph) ctx_graph = ctx_graph + sparql_iter(expr['p2'], join, ctx_graph) return ctx_graph # handles a UNION node if expr.name == "Union": # make nested UNIONs n-ary operators if (ctx, RDF.type, URIRef("http://www.dfki.de/voc#Union")) in ctx_graph: ctx_graph = ctx_graph + sparql_iter(expr['p1'], ctx, ctx_graph) ctx_graph = ctx_graph + sparql_iter(expr['p2'], ctx, ctx_graph) else: union = BNode() ctx_graph.add( (union, RDF.type, URIRef("http://www.dfki.de/voc#Union"))) ctx_graph.add( (ctx, URIRef("http://www.dfki.de/voc#arg"), union)) ctx_graph = ctx_graph + sparql_iter(expr['p1'], union, ctx_graph) ctx_graph = ctx_graph + sparql_iter(expr['p2'], union, ctx_graph) return ctx_graph # handles a PROJECT node if expr.name == "Project": # since we assume UNION normal form, we always add a UNION below the SELECT node union = BNode() ctx_graph.add( (union, RDF.type, URIRef("http://www.dfki.de/voc#Union"))) ctx_graph.add((ctx, URIRef("http://www.dfki.de/voc#arg"), union)) ctx_graph = ctx_graph + sparql_iter(expr['p'], union, ctx_graph) return ctx_graph # handles a SELECT node and transform the query into a R-graph if query.algebra.name == "SelectQuery": # every SELECT node is a query graph from rdflib.namespace import Namespace, NamespaceManager dfkiNs = Namespace("http://www.dfki.de/voc#") namespace_manager = NamespaceManager(rdflib.Graph()) namespace_manager.bind("dfki", dfkiNs, override=False) r_graph = rdflib.Graph() r_graph.namespace_manager = namespace_manager select = BNode() # a GUID is generated r_graph.add( (select, RDF.type, URIRef("http://www.dfki.de/voc#Select"))) for proj_var in set(query.algebra.PV): r_graph.add((select, URIRef("http://www.dfki.de/voc#var"), BNode(proj_var))) r_graph = r_graph + sparql_iter(query.algebra['p'], select, r_graph) return ground_projected_variables(r_graph) else: pass
def main(): from optparse import OptionParser op = OptionParser( 'usage: %prog [options] factFile1 factFile2 ... factFileN') op.add_option('--why', default=None, help='Specifies the goals to solve for using the non-niave methods' + 'see --method') op.add_option('--closure', action='store_true', default=False, help='Whether or not to serialize the inferred triples' + ' along with the original triples. Otherwise ' + '(the default behavior), serialize only the inferred triples') op.add_option('--imports', action='store_true', default=False, help='Whether or not to follow owl:imports in the fact graph') op.add_option('--output', default='n3', metavar='RDF_FORMAT', choices=['xml', 'TriX', 'n3', 'pml', 'proof-graph', 'nt', 'rif', 'rif-xml', 'conflict', 'man-owl'], help="Serialize the inferred triples and/or original RDF triples to STDOUT "+ "using the specified RDF syntax ('xml','pretty-xml','nt','turtle', "+ "or 'n3') or to print a summary of the conflict set (from the RETE "+ "network) if the value of this option is 'conflict'. If the the "+ " value is 'rif' or 'rif-xml', Then the rules used for inference "+ "will be serialized as RIF. If the value is 'pml' and --why is used, "+ " then the PML RDF statements are serialized. If output is "+ "'proof-graph then a graphviz .dot file of the proof graph is printed. "+ "Finally if the value is 'man-owl', then the RDF facts are assumed "+ "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default") op.add_option('--class', dest='classes', action='append', default=[], metavar='QNAME', help='Used with --output=man-owl to determine which '+ 'classes within the entire OWL/RDF are targetted for serialization'+ '. Can be used more than once') op.add_option('--hybrid', action='store_true', default=False, help='Used with with --method=bfp to determine whether or not to '+ 'peek into the fact graph to identify predicates that are both '+ 'derived and base. This is expensive for large fact graphs'+ 'and is explicitely not used against SPARQL endpoints') op.add_option('--property', action='append', dest='properties', default=[], metavar='QNAME', help='Used with --output=man-owl or --extract to determine which '+ 'properties are serialized / extracted. Can be used more than once') op.add_option('--normalize', action='store_true', default=False, help="Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"+ "The default is %default") op.add_option('--ddlGraph', default=False, help="The location of a N3 Data Description document describing the IDB predicates") op.add_option('--input-format', default='xml', dest='inputFormat', metavar='RDF_FORMAT', choices=['xml', 'trix', 'n3', 'nt', 'rdfa'], help="The format of the RDF document(s) which serve as the initial facts "+ " for the RETE network. One of 'xml','n3','trix', 'nt', "+ "or 'rdfa'. The default is %default") op.add_option('--safety', default='none', metavar='RULE_SAFETY', choices=['loose', 'strict', 'none'], help="Determines how to handle RIF Core safety. A value of 'loose' "+ " means that unsafe rules will be ignored. A value of 'strict' "+ " will cause a syntax exception upon any unsafe rule. A value of "+ "'none' (the default) does nothing") op.add_option('--pDSemantics', action='store_true', default=False, help='Used with --dlp to add pD semantics ruleset for semantics not covered '+ 'by DLP but can be expressed in definite Datalog Logic Programming'+ ' The default is %default') op.add_option('--stdin', action='store_true', default=False, help='Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ') op.add_option('--ns', action='append', default=[], metavar="PREFIX=URI", help='Register a namespace binding (QName prefix to a base URI). This '+ 'can be used more than once') op.add_option('--rules', default=[], action='append', metavar='PATH_OR_URI', help='The Notation 3 documents to use as rulesets for the RETE network'+ '. Can be specified more than once') op.add_option('-d', '--debug', action='store_true', default=False, help='Include debugging output') op.add_option('--strictness', default='defaultBase', metavar='DDL_STRICTNESS', choices=['loose', 'defaultBase', 'defaultDerived', 'harsh'], help='Used with --why to specify whether to: *not* check if predicates are '+ ' both derived and base (loose), if they are, mark as derived (defaultDerived) '+ 'or as base (defaultBase) predicates, else raise an exception (harsh)') op.add_option('--method', default='naive', metavar='reasoning algorithm', choices=['gms', 'bfp', 'naive'], help='Used with --why to specify how to evaluate answers for query. '+ 'One of: gms,bfp,naive') op.add_option('--firstAnswer', default=False, action='store_true', help='Used with --why to determine whether to fetch all answers or just '+ 'the first') op.add_option('--edb', default=[], action='append', metavar='EXTENSIONAL_DB_PREDICATE_QNAME', help='Used with --why/--strictness=defaultDerived to specify which clashing '+ 'predicate will be designated as a base predicate') op.add_option('--idb', default=[], action='append', metavar='INTENSIONAL_DB_PREDICATE_QNAME', help='Used with --why/--strictness=defaultBase to specify which clashing '+ 'predicate will be designated as a derived predicate') op.add_option('--hybridPredicate', default=[], action='append', metavar='PREDICATE_QNAME', help='Used with --why to explicitely specify a hybrid predicate (in both '+ ' IDB and EDB) ') op.add_option('--noMagic', default=[], action='append', metavar='DB_PREDICATE_QNAME', help='Used with --why to specify that the predicate shouldnt have its '+ 'magic sets calculated') op.add_option('--filter', action='append', default=[], metavar='PATH_OR_URI', help='The Notation 3 documents to use as a filter (entailments do not particpate in network)') op.add_option('--ruleFacts', action='store_true', default=False, help="Determines whether or not to attempt to parse initial facts from "+ "the rule graph. The default is %default") op.add_option('--builtins', default=False, metavar='PATH_TO_PYTHON_MODULE', help="The path to a python module with function definitions (and a "+ "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations") op.add_option('--dlp', action='store_true', default=False, help='Use Description Logic Programming (DLP) to extract rules from OWL/RDF. The default is %default') op.add_option('--sparqlEndpoint', action='store_true', default=False, help='Indicates that the sole argument is the URI of a SPARQL endpoint to query') op.add_option('--ontology', action='append', default=[], metavar='PATH_OR_URI', help='The path to an OWL RDF/XML graph to use DLP to extract rules from '+ '(other wise, fact graph(s) are used) ') op.add_option('--ruleFormat', default='n3', dest='ruleFormat', metavar='RULE_FORMAT', choices=['n3', 'rif'], help="The format of the rules to parse ('n3', 'rif'). The default is %default") op.add_option('--ontologyFormat', default='xml', dest='ontologyFormat', metavar='RDF_FORMAT', choices=['xml', 'trix', 'n3', 'nt', 'rdfa'], help="The format of the OWL RDF/XML graph specified via --ontology. The default is %default") op.add_option('--builtinTemplates', default=None, metavar='N3_DOC_PATH_OR_URI', help='The path to an N3 document associating SPARQL FILTER templates to '+ 'rule builtins') op.add_option('--negation', action='store_true', default=False, help='Extract negative rules?') op.add_option('--normalForm', action='store_true', default=False, help='Whether or not to reduce DL axioms & LP rules to a normal form') (options, facts) = op.parse_args() nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'} for nsBind in options.ns: pref, nsUri = nsBind.split('=') nsBinds[pref]=nsUri namespace_manager = NamespaceManager(Graph()) if options.sparqlEndpoint: factGraph = Graph(plugin.get('SPARQL', Store)(facts[0])) options.hybrid = False else: factGraph = Graph() ruleSet = Ruleset() for fileN in options.rules: if options.ruleFacts and not options.sparqlEndpoint: factGraph.parse(fileN, format='n3') print("Parsing RDF facts from %s" % fileN) if options.builtins: import imp userFuncs = imp.load_source('builtins', options.builtins) rs = HornFromN3(fileN, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS) nsBinds.update(rs.nsMapping) elif options.ruleFormat == 'rif': try: from FuXi.Horn.RIFCore import RIFCoreParser rif_parser = RIFCoreParser(location=fileN, debug=options.debug) rs = rif_parser.getRuleset() except ImportError: raise Exception( "Missing 3rd party libraries for RIF processing" ) else: rs = HornFromN3(fileN) nsBinds.update(rs.nsMapping) ruleSet.formulae.extend(rs) #ruleGraph.parse(fileN,format='n3') ruleSet.nsMapping = nsBinds for prefix, uri in list(nsBinds.items()): namespace_manager.bind(prefix, uri, override=False) closureDeltaGraph = Graph() closureDeltaGraph.namespace_manager = namespace_manager factGraph.namespace_manager = namespace_manager if not options.sparqlEndpoint: for fileN in facts: factGraph.parse(fileN, format=options.inputFormat) if options.imports: for owlImport in factGraph.objects(predicate=OWL_NS.imports): factGraph.parse(owlImport) print("Parsed Semantic Web Graph.. %s" % owlImport) if not options.sparqlEndpoint and facts: for pref, uri in factGraph.namespaces(): nsBinds[pref]=uri if options.stdin: assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint" factGraph.parse(sys.stdin, format=options.inputFormat) #Normalize namespace mappings #prune redundant, rdflib-allocated namespace prefix mappings newNsMgr = NamespaceManager(factGraph) from FuXi.Rete.Util import CollapseDictionary for k, v in list(CollapseDictionary(dict([(k, v) for k, v in factGraph.namespaces()])).items()): newNsMgr.bind(k, v) factGraph.namespace_manager = newNsMgr if options.normalForm: NormalFormReduction(factGraph) if not options.sparqlEndpoint: workingMemory = generateTokenSet(factGraph) if options.builtins: import imp userFuncs = imp.load_source('builtins', options.builtins) rule_store, rule_graph, network = SetupRuleStore( makeNetwork=True, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS) else: rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True) network.inferredFacts = closureDeltaGraph network.nsMap = nsBinds if options.dlp: from FuXi.DLP.DLNormalization import NormalFormReduction if options.ontology: ontGraph = Graph() for fileN in options.ontology: ontGraph.parse(fileN, format=options.ontologyFormat) for prefix, uri in ontGraph.namespaces(): nsBinds[prefix] = uri namespace_manager.bind(prefix, uri, override=False) if options.sparqlEndpoint: factGraph.store.bind(prefix, uri) else: ontGraph=factGraph NormalFormReduction(ontGraph) dlp=network.setupDescriptionLogicProgramming( ontGraph, addPDSemantics=options.pDSemantics, constructNetwork=False, ignoreNegativeStratus=options.negation, safety=safetyNameMap[options.safety]) ruleSet.formulae.extend(dlp) if options.output == 'rif' and not options.why: for rule in ruleSet: print(rule) if options.negation: for nRule in network.negRules: print(nRule) elif options.output == 'man-owl': cGraph = network.closureGraph(factGraph, readOnly=False) cGraph.namespace_manager = namespace_manager Individual.factoryGraph = cGraph if options.classes: mapping = dict(namespace_manager.namespaces()) for c in options.classes: pref, uri = c.split(':') print(Class(URIRef(mapping[pref] + uri)).__repr__(True)) elif options.properties: mapping = dict(namespace_manager.namespaces()) for p in options.properties: pref, uri = p.split(':') print(Property(URIRef(mapping[pref] + uri))) else: for p in AllProperties(cGraph): print(p.identifier, first(p.label)) print(repr(p)) for c in AllClasses(cGraph): if options.normalize: if c.isPrimitive(): primAnc = [sc for sc in c.subClassOf if sc.isPrimitive()] if len(primAnc) > 1: warnings.warn("Branches of primitive skeleton taxonomy" + " should form trees: %s has %s primitive parents: %s" % ( c.qname, len(primAnc), primAnc), UserWarning, 1) children = [desc for desc in c.subSumpteeIds()] for child in children: for otherChild in [o for o in children if o is not child]: if not otherChild in [c.identifier for c in Class(child).disjointWith]: # and\ warnings.warn("Primitive children (of %s) " % (c.qname) + "must be mutually disjoint: %s and %s" % ( Class(child).qname, Class(otherChild).qname), UserWarning, 1) # if not isinstance(c.identifier,BNode): print(c.__repr__(True)) if not options.why: #Naive construction of graph for rule in ruleSet: network.buildNetworkFromClause(rule) magicSeeds=[] if options.why: builtinTemplateGraph = Graph() if options.builtinTemplates: builtinTemplateGraph = Graph().parse(options.builtinTemplates, format='n3') factGraph.templateMap = \ dict([(pred, template) for pred, _ignore, template in builtinTemplateGraph.triples( (None, TEMPLATES.filterTemplate, None))]) goals = [] query = ParseSPARQL(options.why) network.nsMap['pml'] = PML network.nsMap['gmp'] = GMP_NS network.nsMap['owl'] = OWL_NS nsBinds.update(network.nsMap) network.nsMap = nsBinds if not query.prolog: query.prolog = Prolog(None, []) query.prolog.prefixBindings.update(nsBinds) else: for prefix, nsInst in list(nsBinds.items()): if prefix not in query.prolog.prefixBindings: query.prolog.prefixBindings[prefix] = nsInst goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern( query.query.whereClause.parsedGraphPattern, query.prolog).patterns]) # dPreds=[]# p for s,p,o in goals ] magicRuleNo = 0 bottomUpDerivedPreds = [] # topDownDerivedPreds = [] defaultBasePreds = [] defaultDerivedPreds = set() hybridPredicates = [] mapping = dict(newNsMgr.namespaces()) for edb in options.edb: pref, uri = edb.split(':') defaultBasePreds.append(URIRef(mapping[pref] + uri)) noMagic = [] for pred in options.noMagic: pref, uri = pred.split(':') noMagic.append(URIRef(mapping[pref] + uri)) if options.ddlGraph: ddlGraph = Graph().parse(options.ddlGraph, format='n3') # @TODO: should also get hybrid predicates from DDL graph defaultDerivedPreds=IdentifyDerivedPredicates( ddlGraph, Graph(), ruleSet) else: for idb in options.idb: pref, uri = idb.split(':') defaultDerivedPreds.add(URIRef(mapping[pref] + uri)) defaultDerivedPreds.update( set([p == RDF.type and o or p for s, p, o in goals])) for hybrid in options.hybridPredicate: pref, uri = hybrid.split(':') hybridPredicates.append(URIRef(mapping[pref]+uri)) if options.method == 'gms': for goal in goals: goalSeed=AdornLiteral(goal).makeMagicPred() print("Magic seed fact (used in bottom-up evaluation) %s" % goalSeed) magicSeeds.append(goalSeed.toRDFTuple()) if noMagic: print("Predicates whose magic sets will not be calculated") for p in noMagic: print("\t%s" % factGraph.qname(p)) for rule in MagicSetTransformation( factGraph, ruleSet, goals, derivedPreds=bottomUpDerivedPreds, strictCheck=nameMap[options.strictness], defaultPredicates=(defaultBasePreds, defaultDerivedPreds), noMagic=noMagic): magicRuleNo+=1 network.buildNetworkFromClause(rule) if len(list(ruleSet)): print("reduction in size of program: %s (%s -> %s clauses)" % ( 100 - (float(magicRuleNo) / float(len(list(ruleSet))) ) * 100, len(list(ruleSet)), magicRuleNo)) start = time.time() network.feedFactsToAdd(generateTokenSet(magicSeeds)) if not [ rule for rule in factGraph.adornedProgram if len(rule.sip)]: warnings.warn( "Using GMS sideways information strategy with no "+ "information to pass from query. Falling back to "+ "naive method over given facts and rules") network.feedFactsToAdd(workingMemory) sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds"%sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds"%sTime print("Time to calculate closure on working memory: %s" % sTimeStr) if options.output == 'rif': print("Rules used for bottom-up evaluation") if network.rules: for clause in network.rules: print(clause) else: for clause in factGraph.adornedProgram: print(clause) if options.output == 'conflict': network.reportConflictSet() elif options.method == 'bfp': topDownDPreds = defaultDerivedPreds if options.builtinTemplates: builtinTemplateGraph = Graph().parse(options.builtinTemplates, format='n3') builtinDict = dict([(pred, template) for pred, _ignore, template in builtinTemplateGraph.triples( (None, TEMPLATES.filterTemplate, None))]) else: builtinDict = None topDownStore=TopDownSPARQLEntailingStore( factGraph.store, factGraph, idb=ruleSet, DEBUG=options.debug, derivedPredicates=topDownDPreds, templateMap=builtinDict, nsBindings=network.nsMap, identifyHybridPredicates=options.hybrid \ if options.method == 'bfp' else False, hybridPredicates=hybridPredicates) targetGraph = Graph(topDownStore) for pref, nsUri in list(network.nsMap.items()): targetGraph.bind(pref, nsUri) start = time.time() # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal) # for goal in goals], # targetGraph) # query = queryLiteral.asSPARQL() # print >>sys.stderr, "Goal to solve ", query sTime = time.time() - start result = targetGraph.query(options.why, initNs=network.nsMap) if result.askAnswer: sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds"%sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds"%sTime print("Time to reach answer ground goal answer of %s: %s" % ( result.askAnswer[0], sTimeStr)) else: for rt in result: sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds" % sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds" % sTime if options.firstAnswer: break print( "Time to reach answer %s via top-down SPARQL sip strategy: %s" % ( rt, sTimeStr)) if options.output == 'conflict' and options.method == 'bfp': for _network, _goal in topDownStore.queryNetworks: print(_network, _goal) _network.reportConflictSet(options.debug) for query in topDownStore.edbQueries: print(query.asSPARQL()) elif options.method == 'naive': start = time.time() network.feedFactsToAdd(workingMemory) sTime = time.time() - start if sTime > 1: sTimeStr = "%s seconds"%sTime else: sTime = sTime * 1000 sTimeStr = "%s milli seconds"%sTime print("Time to calculate closure on working memory: %s" % sTimeStr) print(network) if options.output == 'conflict': network.reportConflictSet() for fileN in options.filter: for rule in HornFromN3(fileN): network.buildFilterNetworkFromClause(rule) if options.negation and network.negRules and options.method in ['both', 'bottomUp']: now=time.time() rt=network.calculateStratifiedModel(factGraph) print("Time to calculate stratified, stable model (inferred %s facts): %s" % ( rt, time.time()-now)) if options.filter: print("Applying filter to entailed facts") network.inferredFacts = network.filteredFacts if options.closure \ and options.output in RDF_SERIALIZATION_FORMATS: cGraph = network.closureGraph(factGraph) cGraph.namespace_manager = namespace_manager print(cGraph.serialize(destination=None, format=options.output, base=None)) elif options.output and options.output in RDF_SERIALIZATION_FORMATS: print(network.inferredFacts.serialize(destination=None, format=options.output, base=None))
# Extra Namespaces ADMS = Namespace('http://www.w3.org/ns/adms#') DCAT = Namespace('http://www.w3.org/ns/dcat#') HYDRA = Namespace('http://www.w3.org/ns/hydra/core#') SCHEMA = Namespace('http://schema.org/') SCV = Namespace('http://purl.org/NET/scovo#') SPDX = Namespace('http://spdx.org/rdf/terms#') VCARD = Namespace('http://www.w3.org/2006/vcard/ns#') FREQ = Namespace('http://purl.org/cld/freq/') EUFREQ = Namespace( 'http://publications.europa.eu/resource/authority/frequency/' ) # noqa: E501 DCT = DCTERMS # More common usage namespace_manager = NamespaceManager(Graph()) namespace_manager.bind('dcat', DCAT) namespace_manager.bind('dct', DCT) namespace_manager.bind('foaf', FOAF) namespace_manager.bind('foaf', FOAF) namespace_manager.bind('hydra', HYDRA) namespace_manager.bind('rdfs', RDFS) namespace_manager.bind('scv', SCV) namespace_manager.bind('skos', SKOS) namespace_manager.bind('vcard', VCARD) namespace_manager.bind('xsd', XSD) namespace_manager.bind('freq', FREQ) # Support JSON-LD in format detection FORMAT_MAP = SUFFIX_FORMAT_MAP.copy() FORMAT_MAP['json'] = 'json-ld' FORMAT_MAP['jsonld'] = 'json-ld'
#=============================================================================== import os #=============================================================================== from rdflib import Graph from rdflib.namespace import Namespace, NamespaceManager import yaml #=============================================================================== with open(os.path.join(os.path.split(__file__)[0], 'curie_map.yaml')) as f: curie_map = yaml.load(f, Loader=yaml.Loader) SCICRUNCH_NS = NamespaceManager(Graph()) _namespaces = {} for prefix, url in curie_map.items(): ns = Namespace(url) SCICRUNCH_NS.bind(prefix, ns, override=True) _namespaces[prefix] = ns #=============================================================================== def namespaces_dict(): return _namespaces #===============================================================================
core_namespaces = { 'dc' : rdflib.namespace.DC, 'dcterms' : rdflib.namespace.DCTERMS, 'ebucore' : Namespace( 'http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#'), 'fcrepo' : Namespace('http://fedora.info/definitions/fcrepo#'), 'fcadmin' : Namespace('info:fcsystem/graph/admin'), 'fcres' : Namespace('info:fcres'), 'fcmain' : Namespace('info:fcsystem/graph/userdata/_main'), 'fcstruct' : Namespace('info:fcsystem/graph/structure'), 'fcsystem' : Namespace('info:fcsystem/'), 'foaf': Namespace('http://xmlns.com/foaf/0.1/'), 'iana' : Namespace('http://www.iana.org/assignments/relation/'), 'ldp' : Namespace('http://www.w3.org/ns/ldp#'), 'pcdm': Namespace('http://pcdm.org/models#'), 'premis' : Namespace('http://www.loc.gov/premis/rdf/v1#'), 'rdf' : rdflib.namespace.RDF, 'rdfs' : rdflib.namespace.RDFS, 'webac' : Namespace('http://www.w3.org/ns/auth/acl#'), 'xsd' : rdflib.namespace.XSD, } ns_collection = {pfx: Namespace(ns) for pfx, ns in config['namespaces'].items()} ns_collection.update(core_namespaces) ns_mgr = NamespaceManager(Graph()) # Collection of prefixes in a dict. for ns,uri in ns_collection.items(): ns_mgr.bind(ns, uri, override=False)
from rdflib import Graph from urllib import quote from rfc3987 import parse # URI/IRI validation from os import sys from resources import FRAME_IT_TO_EN from resources import FRAME_DBPO_MAP from rdflib.namespace import Namespace, NamespaceManager from rdflib import Graph # Namespace prefixes for RDF serialization RESOURCE_NS = Namespace('http://it.dbpedia.org/resource/') FACT_EXTRACTION_NS = Namespace('http://dbpedia.org/fact-extraction/') ONTOLOGY_NS = Namespace('http://dbpedia.org/ontology/') NAMESPACE_MANAGER = NamespaceManager(Graph()) NAMESPACE_MANAGER.bind('resource', RESOURCE_NS) NAMESPACE_MANAGER.bind('fact', FACT_EXTRACTION_NS) NAMESPACE_MANAGER.bind('ontology', ONTOLOGY_NS) NAMESPACES = { 'ontology': ONTOLOGY_NS, 'resource': RESOURCE_NS, 'fact_extraction': FACT_EXTRACTION_NS, } def to_assertions(labeled_results, id_to_title, outfile='dataset.nt', score_dataset=None, format='nt'): """ Serialize the labeled results into RDF NTriples :param list labeled_results: Data for each sentence. Schema:
turtle.TurtleSerializer.startDocument = turtle_patched_startDocument # Namespaces in use from rdflib.namespace import RDF, RDFS, XSD, DCTERMS, FOAF BIO = Namespace("http://purl.org/vocab/bio/0.1/") SCHEMA = Namespace("http://schema.org/") MO = Namespace("http://purl.org/ontology/mo/") EVENT = Namespace("http://purl.org/NET/c4dm/event.owl#") TL = Namespace("http://purl.org/NET/c4dm/timeline.owl#") ns = NamespaceManager(Graph()) namespaces = [(n.lower(), globals()[n]) for n in dict(globals()) if isinstance(globals()[n], Namespace)] for name, namespace in namespaces: ns.bind(name, namespace) def generate(config, type, data): global base base = config.BASE_URL func_name = 'generate_%s' % type if func_name in globals(): graph = globals()['generate_%s' % type](config, data) else: doc = BNode() uri = base graph = Graph('IOMemory', doc) graph.namespace_manager = ns return graph
def main(): from optparse import OptionParser parser = OptionParser() parser.add_option('--stdin', type="choice", choices = ['xml', 'trix', 'n3', 'nt', 'rdfa'], help = 'Parse RDF from STDIN (useful for piping) with given format') parser.add_option('-x', '--xml', action='append', help = 'Append to the list of RDF/XML documents to parse') parser.add_option('-t', '--trix', action='append', help = 'Append to the list of TriX documents to parse') parser.add_option('-n', '--n3', action='append', help = 'Append to the list of N3 documents to parse') parser.add_option('--nt', action='append', help = 'Append to the list of NT documents to parse') parser.add_option('-a', '--rdfa', action='append', help = 'Append to the list of RDFa documents to parse') parser.add_option('-o', '--output', type="choice", choices = ['n3', 'xml', 'pretty-xml', 'TriX', 'turtle', 'nt'], help = 'Format of the final serialized RDF graph') parser.add_option('-m', '--ns', action='append', help = 'Register a namespace binding (QName prefix to a base URI)') parser.add_option('-r', '--rules', action='append', help = 'Append to the list of fact files to use to perform reasoning') parser.add_option('-i', '--inferred', help = 'URI to use for the graph containing any inferred triples') parser.set_defaults( xml=[], trix=[], n3=[], nt=[], rdfa=[], ns=[], output='n3' ) (options, args) = parser.parse_args() store = plugin.get(RDFLIB_STORE,Store)() store.open(RDFLIB_CONNECTION) namespace_manager = NamespaceManager(Graph()) for prefixDef in options.ns: prefix, uri = prefixDef.split('=') namespace_manager.bind(prefix, uri, override=False) factGraph = ConjunctiveGraph(store) for graphRef in options.xml: factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef), format='xml') for graphRef in options.trix: factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef), format='trix') for graphRef in options.n3: factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef), format='n3') for graphRef in options.nt: factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef), format='nt') for graphRef in options.rdfa: factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef), format='rdfa') if options.stdin: factGraph.parse(sys.stdin, format=options.stdin) if options.inferred and len(options.rules) > 0: inferredURI = URIRef(options.inferred) ruleStore = N3RuleStore() ruleGraph = Graph(ruleStore) for ruleFile in options.rules: ruleGraph.parse(ruleFile, format='n3') tokenSet = generateTokenSet(factGraph) deltaGraph = Graph(store=factGraph.store, identifier=inferredURI) network = ReteNetwork(ruleStore, inferredTarget=deltaGraph) network.feedFactsToAdd(tokenSet) print factGraph.serialize(destination=None, format=options.output, base=None) store.rollback()
def spo(request, doc_id): # Create and bind namespaces namespace_manager = NamespaceManager(Graph()) for ns in namespaces_dict: namespace_manager.bind(ns, Namespace(namespaces_dict[ns]), override=False) # Load graph from the uploaded file if type(doc_id) == int or doc_id == "1": # If doc_id is an integer, a file was uploaded. If doc_id is the string "1", the example is parsed. graph = getrdf(doc_id) else: # If doc_id is not an integer, therefore a string, an SPARQL-endpoint url is being parsed graph = Graph() graph.parse(data=spo2rdfjson(doc_id), format="rdf-json") # Generate list of triples triple_list = [] subject_list = [] predicate_list = [] object_list = [] for s, p, o in graph: triple_list.append([s, p, o]) subject_list.append(str(s).encode('utf-8', 'ignore')) predicate_list.append(str(p).encode('utf-8', 'ignore')) # print str(o).encode('utf-8', 'ignore') object_list.append(str(o).decode('utf-8', 'ignore')) subject_set = json.dumps(list(set(subject_list))) predicate_set = json.dumps(list(set(predicate_list))) object_set = json.dumps(list(set(object_list))) # Determine xml:base subject_base_test_set = {triple[0] for triple in triple_list} base_set = {subject[:subject.rfind("/")] for subject in subject_base_test_set} # If all subjects share the same substring-base, this substring-base is likely to be the xml:base. if len(base_set) == 1: base = str(list(base_set)[0]) + "/" else: base = "" # Insert namespaces into graph graph.namespace_manager = namespace_manager triple_fetcher_classes = get_triple_fetcher_classes() # Get the config files mapping_config = json.loads(open(SINDICE_CONFIG_MAPPING, 'r').read()) query_config = json.loads(open(SINDICE_CONFIG_QUERY, 'r').read()) import_label_lengths = dict() for tf_class in triple_fetcher_classes: mapping_length = len(mapping_config[tf_class]) query_length = len(query_config[tf_class]) if mapping_length >= query_length: import_label_lengths[tf_class] = mapping_length else: import_label_lengths[tf_class] = query_length import_config = {"query": query_config, "mapping": mapping_config, "import_label_lengths": import_label_lengths} import_config_dj = dict() for tf_class in triple_fetcher_classes: import_config_dj[tf_class] = dict() for tf_class in import_config_dj: import_config_dj[tf_class]["query"] = dict() import_config_dj[tf_class]["mapping"] = dict() for tf_class in import_config["query"]: i = 0 for conf in import_config["query"][tf_class]: import_config_dj[tf_class]["query"][i] = {conf : import_config["query"][tf_class][conf]} i += 1 for tf_class in import_config["mapping"]: i = 0 for conf in import_config["mapping"][tf_class]: import_config_dj[tf_class]["mapping"][i] = {conf : import_config["mapping"][tf_class][conf]} i += 1 print import_config_dj # Serialize graph to RDFJson rdfjson = graph.serialize(None, format="rdf-json") return render_to_response( 'rdfedit/triples.html', {'rdfjson': rdfjson, 'triple_list': triple_list, 'subject_set': subject_set, 'predicate_set': predicate_set, 'object_set': object_set, 'namespaces_dict': json.dumps(namespaces_dict), 'base': base, "triple_fetcher_classes": json.dumps(triple_fetcher_classes), "import_config": json.dumps(import_config), "import_config_dj": import_config_dj}, context_instance=RequestContext(request) )
from rdflib.namespace import Namespace, NamespaceManager from rdflib import Graph #Our data namespace D = Namespace('http://vivo.mydomain.edu/individual/') #The VIVO namespace VIVO = Namespace('http://vivoweb.org/ontology/core#') #The VCARD namespace VCARD = Namespace('http://www.w3.org/2006/vcard/ns#') #The OBO namespace OBO = Namespace('http://purl.obolibrary.org/obo/') #The BIBO namespace BIBO = Namespace('http://purl.org/ontology/bibo/') #The FOAF namespace FOAF = Namespace('http://xmlns.com/foaf/0.1/') #The SKOS namespace SKOS = Namespace('http://www.w3.org/2004/02/skos/core#') ns_manager = NamespaceManager(Graph()) ns_manager.bind('d', D) ns_manager.bind('vivo', VIVO) ns_manager.bind('vcard', VCARD) ns_manager.bind('obo', OBO) ns_manager.bind('bibo', BIBO) ns_manager.bind("foaf", FOAF) ns_manager.bind("skos", SKOS)
if self._spacious: self.write('\n') turtle.TurtleSerializer.startDocument = turtle_patched_startDocument # Namespaces in use from rdflib.namespace import RDF, RDFS, XSD, DCTERMS, FOAF BIO = Namespace("http://purl.org/vocab/bio/0.1/") SCHEMA = Namespace("http://schema.org/") MO = Namespace("http://purl.org/ontology/mo/") EVENT = Namespace("http://purl.org/NET/c4dm/event.owl#") TL = Namespace("http://purl.org/NET/c4dm/timeline.owl#") ns = NamespaceManager(Graph()) namespaces = [(n.lower(), globals()[n]) for n in dict(globals()) if isinstance(globals()[n], Namespace)] for name,namespace in namespaces: ns.bind(name, namespace) def generate(config, type, data): global base base = config.BASE_URL func_name = 'generate_%s'%type if func_name in globals(): graph = globals()['generate_%s'%type](config, data) else: doc = BNode() uri = base graph = Graph('IOMemory', doc) graph.namespace_manager = ns return graph def link(link):
from .resolver import get_URI_for_AILLA, get_URI_for_ANLA, get_URI_for_TLA, get_URI_for_Paradisec, get_URI_for_ELAR #define general namespaces QUEST = Namespace("http://zasquest.org/") QUESTRESOLVER = Namespace("http://zasquest.org/resolver/") DBPEDIA = Namespace("http://dbpedia.org/ontology/") WIKIDATA = Namespace("http://www.wikidata.org/entity/") LGR = Namespace("https://www.eva.mpg.de/lingua/resources/glossing-rules.php/") LIGT = Namespace("http://purl.org/liodi/ligt/") FLEX = Namespace("http://example.org/flex/") NIF = Namespace("http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#") #define archive namespaces ELD_NAMESPACE_MANAGER = NamespaceManager(Graph()) ELD_NAMESPACE_MANAGER.bind('dbpedia', DBPEDIA) ELD_NAMESPACE_MANAGER.bind('wikidata', WIKIDATA) ELD_NAMESPACE_MANAGER.bind('quest', QUEST) #for ontology ELD_NAMESPACE_MANAGER.bind('QUESTRESOLVER', QUESTRESOLVER) #for the bridge for rewritable URLs ELD_NAMESPACE_MANAGER.bind("rdfs", RDFS) ELD_NAMESPACE_MANAGER.bind("dc", DC) ELD_NAMESPACE_MANAGER.bind("lgr", LGR) ELD_NAMESPACE_MANAGER.bind("ligt", LIGT) ELD_NAMESPACE_MANAGER.bind("flex", FLEX) ELD_NAMESPACE_MANAGER.bind("nif", NIF) ARCHIVE_NAMESPACES = { 'paradisec': Namespace("https://catalog.paradisec.org.au/collections/"), #'elarcorpus': Namespace("https://lat1.lis.soas.ac.uk/corpora/ELAR/"), 'elarcorpus': Namespace("https://elar.soas.ac.uk/Record/"), 'elarfiles': Namespace("https://elar.soas.ac.uk/resources/"),
from rdflib import URIRef, Literal, BNode from rdflib.namespace import RDF, SKOS, RDFS, OWL, Namespace, NamespaceManager, XSD BF = Namespace("http://id.loc.gov/ontologies/bibframe/") BDR = Namespace("http://purl.bdrc.io/resource/") BDO = Namespace("http://purl.bdrc.io/ontology/core/") TMP = Namespace("http://purl.bdrc.io/ontology/tmp/") BDG = Namespace("http://purl.bdrc.io/graph/") BDA = Namespace("http://purl.bdrc.io/admindata/") ADM = Namespace("http://purl.bdrc.io/ontology/admin/") MBBT = Namespace("http://mbingenheimer.net/tools/bibls/") CBCT_URI = "https://dazangthings.nz/cbc/text/" CBCT = Namespace(CBCT_URI) NSM = NamespaceManager(rdflib.Graph()) NSM.bind("bdr", BDR) NSM.bind("", BDO) NSM.bind("bdg", BDG) NSM.bind("bda", BDA) NSM.bind("adm", ADM) NSM.bind("skos", SKOS) NSM.bind("rdf", RDF) NSM.bind("cbct", CBCT) NSM.bind("mbbt", MBBT) NSM.bind("bf", BF) K_TO_T = {} K_TO_SKT = {} T_TO_SKT = {} with open('input/Taisho-K.csv', newline='') as csvfile:
import sys import os import csv import re import rdflib from rdflib import URIRef, Literal, BNode from rdflib.namespace import RDF, SKOS, Namespace, NamespaceManager, XSD BDR = Namespace("http://purl.bdrc.io/resource/") BDO = Namespace("http://purl.bdrc.io/ontology/core/") BDG = Namespace("http://purl.bdrc.io/graph/") BDA = Namespace("http://purl.bdrc.io/admindata/") ADM = Namespace("http://purl.bdrc.io/ontology/admin/") NSM = NamespaceManager(rdflib.Graph()) NSM.bind("bdr", BDR) NSM.bind("", BDO) NSM.bind("bdg", BDG) NSM.bind("bda", BDA) NSM.bind("adm", ADM) NSM.bind("skos", SKOS) def linestordf(csvlines, graphname): """ Returns an RDF graph or dataset from a yaml object """ curidx = 0 ds = rdflib.Dataset() g = ds.graph(BDG[graphname]) g.namespace_manager = NSM i = 0
import json import sys import urllib.parse from rdflib import Namespace, Graph, RDFS # , URIRef, BNode from rdflib.namespace import NamespaceManager, DC # , FOAF from resolver import get_URI_for_AILLA, get_URI_for_ANLA, get_URI_for_TLA, get_URI_for_Paradisec, get_URI_for_ELAR # define general namespaces #QUEST = Namespace("http://zasquest.org/") #QUESTRESOLVER = Namespace("http://zasquest.org/resolver/") WIKIDATA = Namespace("https://www.wikidata.org/wiki/") # define archive namespaces NAMESPACE_MANAGER = NamespaceManager(Graph()) NAMESPACE_MANAGER.bind("wikidata", WIKIDATA) #NAMESPACE_MANAGER.bind("quest", QUEST) # for ontology #NAMESPACE_MANAGER.bind( #"QUESTRESOLVER", QUESTRESOLVER #) # for the bridge for rewritable URLs NAMESPACE_MANAGER.bind("rdfs", RDFS) NAMESPACE_MANAGER.bind("dc", DC) ARCHIVE_NAMESPACES = { 'paradisec': Namespace("https://catalog.paradisec.org.au/collections/"), #'elarcorpus': Namespace("https://lat1.lis.soas.ac.uk/corpora/ELAR/"), 'elarcorpus': Namespace("https://elar.soas.ac.uk/Record/"), 'elarfiles': Namespace("https://elar.soas.ac.uk/resources/"),
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ # # Sample ontology on which we test grontocrawler # # Please pay attention that it does contain some mistakes # from rdflib.extras.infixowl import (Class, Property, CastClass, some) from rdflib import Namespace, Graph, OWL, Literal from rdflib.namespace import NamespaceManager ns = Namespace('http://plumdeq.xyz/ontologies/hypothesis/') ns_manager = NamespaceManager(Graph()) ns_manager.bind('hypo', ns, override=False) ns_manager.bind('owl', OWL, override=False) g = Graph() g.namespace_manager = ns_manager # ## Main classes # con = Class( ns.Continuant, graph=g, comment=Literal('Material entity. Examples: cells, molecules, joints')) occ = Class( ns.Occurent, graph=g, comment=Literal('Occuring processes, which start and end at some point')) condition = Class(