def serialize_graph(request, rdfjson, base): editgraph = Graph() editgraph.parse(data=rdfjson, format="rdf-json") namespace_manager = NamespaceManager(Graph()) for ns in namespaces_dict: namespace_manager.bind(ns, Namespace(namespaces_dict[ns]), override=False) editgraph.namespace_manager = namespace_manager if base: """ RDFLib Module to insert the base during serialization is buggy. Manual insertion needed graphxml_string = editgraph.serialize(format="pretty-xml", base=base) """ graphxml_string = editgraph.serialize(format="pretty-xml").decode('utf-8', 'ignore') graphxml_string = graphxml_string.replace('rdf:RDF\n', 'rdf:RDF\n xml:base="' + base +'"\n') # print graphxml_string else: graphxml_string = editgraph.serialize(format="pretty-xml") graphxml_to_db = RDF_XML(rdfxml_string = graphxml_string) graphxml_to_db.save() print graphxml_to_db.id return json.dumps({'message':graphxml_to_db.id})
def serialize(self, add, delete): commit = Namespace("urn:commit:" + str(uuid.uuid1()) + ":") eccrev = Namespace("https://vocab.eccenca.com/revision/") g = ConjunctiveGraph() namespace_manager = NamespaceManager(g) namespace_manager.bind('eccrev', eccrev, override=False) g.add((commit.term(""), RDF.type, eccrev.Commit)) graphUris = set(delete.keys()) | set(add.keys()) for graphUri in graphUris: if (graphUri in delete.keys() and len(delete[graphUri]) > 0) or (graphUri in add.keys() and len(add[graphUri]) > 0): revision = Namespace("urn:revision:" + str(uuid.uuid1()) + ":") g.add((commit.term(""), eccrev.hasRevision, revision.term(""))) g.add((revision.term(""), RDF.type, eccrev.Revision)) if str(graphUri) != 'http://quitdiff.default/': g.add((revision.term(""), eccrev.hasRevisionGraph, graphUri)) if graphUri in delete.keys() and len(delete[graphUri]) > 0: deleteGraphName = revision.term(":delete") g.add((revision.term(""), eccrev.deltaDelete, deleteGraphName)) for triple in delete[graphUri]: g.add(triple + (deleteGraphName,)) if graphUri in add.keys() and len(add[graphUri]) > 0: insertGraphName = revision.term(":insert") g.add((revision.term(""), eccrev.deltaInsert, insertGraphName)) for triple in add[graphUri]: g.add(triple + (insertGraphName,)) return g.serialize(format="trig").decode("utf-8")
def __init__(self,ruleStore,name = None, initialWorkingMemory = None, inferredTarget = None, nsMap = {}, graphVizOutFile=None, dontFinalize=False, goal=None): self.leanCheck = {} self.goal = goal self.nsMap = nsMap self.name = name and name or BNode() self.nodes = {} self.alphaPatternHash = {} self.ruleSet = set() for alphaPattern in xcombine(('1','0'),('1','0'),('1','0')): self.alphaPatternHash[tuple(alphaPattern)] = {} if inferredTarget is None: self.inferredFacts = Graph() namespace_manager = NamespaceManager(self.inferredFacts) for k,v in nsMap.items(): namespace_manager.bind(k, v) self.inferredFacts.namespace_manager = namespace_manager else: self.inferredFacts = inferredTarget self.workingMemory = initialWorkingMemory and initialWorkingMemory or set() self.proofTracers = {} self.terminalNodes = set() self.instantiations = {} start = time.time() self.ruleStore=ruleStore self.justifications = {} self.dischargedBindings = {} if not dontFinalize: self.ruleStore._finalize() self.filteredFacts = Graph() #'Universal truths' for a rule set are rules where the LHS is empty. # Rather than automatically adding them to the working set, alpha nodes are 'notified' # of them, so they can be checked for while performing inter element tests. self.universalTruths = [] from FuXi.Horn.HornRules import Ruleset self.rules=set() self.negRules = set() for rule in Ruleset(n3Rules=self.ruleStore.rules,nsMapping=self.nsMap): import warnings warnings.warn( "Rules in a network should be built *after* construction via "+ " self.buildNetworkClause(HornFromN3(n3graph)) for instance", DeprecationWarning,2) self.buildNetworkFromClause(rule) self.alphaNodes = [node for node in self.nodes.values() if isinstance(node,AlphaNode)] self.alphaBuiltInNodes = [node for node in self.nodes.values() if isinstance(node,BuiltInAlphaNode)] self._setupDefaultRules() if initialWorkingMemory: start = time.time() self.feedFactsToAdd(initialWorkingMemory) print >>sys.stderr,"Time to calculate closure on working memory: %s m seconds"%((time.time() - start) * 1000) if graphVizOutFile: print >>sys.stderr,"Writing out RETE network to ", graphVizOutFile renderNetwork(self,nsMap=nsMap).write(graphVizOutFile)
class NSManager: def __init__(self, ns_dict): """ TODO: check ns_dict """ self._ns_dict = ns_dict self._rdflib_ns_manager = None def __getitem__(self, key): return self._ns_dict[key] def __getattr__(self, key): try: return self._ns_dict[key] except KeyError: raise AttributeError() def add_namespace(self, prefix, namespace): """ TODO: check prefix and namespace """ if self._ns_dict.has_key(prefix): raise AlreadyRegisteredNSError(prefix) self._ns_dict[prefix] = namespace @property def ns_dict(self): return self._ns_dict @property def rdflib_ns_manager(self): """ For using prefixes in RDFlib graphs """ if self._rdflib_ns_manager is None: self._rdflib_ns_manager = NamespaceManager(Graph()) for namesp in self._ns_dict: self._rdflib_ns_manager.bind(namesp, self._ns_dict[namesp]) return self._rdflib_ns_manager
def init_database(self): """ Open the configured database """ self._init_rdf_graph() L.debug("opening " + str(self.source)) try: self.source.open() except OpenFailError as e: L.error('Failed to open the data source because: %s', e) raise nm = NamespaceManager(self['rdf.graph']) self['rdf.namespace_manager'] = nm self['rdf.graph'].namespace_manager = nm # A runtime version number for the graph should update for all changes # to the graph self['rdf.graph.change_counter'] = 0 self['rdf.graph'].store.dispatcher.subscribe(TripleAddedEvent, self._context_changed_handler()) self['rdf.graph'].store.dispatcher.subscribe(TripleRemovedEvent, self._context_changed_handler()) self['rdf.graph']._add = self['rdf.graph'].add self['rdf.graph']._remove = self['rdf.graph'].remove self['rdf.graph'].add = self._my_graph_add self['rdf.graph'].remove = self._my_graph_remove nm.bind("", self['rdf.namespace'])
def load_graph_prefixes(): namespace_manager = NamespaceManager(Graph()) # restPrefix = Namespace('http://restaurants.recommender.es/od-data/restaurant/') # locPrefix = Namespace('http://restaurants.recommender.es/od-data/location/') # ratePrefix = Namespace('http://restaurants.recommender.es/od-data/rate/') # contPrefix = Namespace('http://restaurants.recommender.es/od-data/contact/') # # namespace_manager.bind('rest', restPrefix) # namespace_manager.bind('loc', locPrefix) # namespace_manager.bind('rate', ratePrefix) # namespace_manager.bind('cont', contPrefix) tree = ET.parse('metadata.xml') root = tree.getroot() prefixes = root.find("prefixes") for prefix in prefixes: namespace = Namespace(prefix.find('namespace').text) prefix_name = prefix.get('name') namespace_manager.bind(prefix_name, namespace) return namespace_manager
def canonicalTerm(self, term): if isinstance(term, URIRef): if self.prolog is not None: namespace_manager = NamespaceManager(Graph()) for prefix,uri in self.prolog.prefixBindings.items(): namespace_manager.bind(prefix, uri, override=False) try: prefix,uri,localName = namespace_manager.compute_qname(term) except: return term if prefix not in self.prolog.prefixBindings: return term else: return u':'.join([prefix, localName]) else: return term elif isinstance(term, Literal): return term.n3() elif isinstance(term, BNode): return term.n3() else: assert isinstance(term, Variable) return term.n3()
def __init__(self, graph): self.bind = lambda *a: None # disable the bind method NamespaceManager.__init__(self, graph) del self.bind # restore original bind method self.bind("", str(CLD)) self.bind("ma", MA) self.bind("rdf", RDF) self.bind("cam", CAM_NS_PREFIX)
def _create_or_get_graph(self,name): if name not in self.models: graph = Graph() namespace_manager = NamespaceManager(Graph()) namespace_manager.bind(DEFAULT_NAMESPACE[0], self.default_ns) graph.ns_manager = namespace_manager self.models[name] = graph return self.models[name]
def openDatabase(self): """ Open a the configured database """ self._init_rdf_graph() L.debug("opening " + str(self.source)) self.source.open() nm = NamespaceManager(self['rdf.graph']) self['rdf.namespace_manager'] = nm self['rdf.graph'].namespace_manager = nm nm.bind("", self['rdf.namespace'])
def initgraphconfig(self, rev): """Initialize graph settings. Public method to initalize graph settings. This method will be run only once. """ if self.graphconf is None: self.graphconf = Graph() self.nsMngrGraphconf = NamespaceManager(self.graphconf) self.nsMngrGraphconf.bind('', self.quit, override=False) graph_files, config_files, rdf_files = self.get_blobs_from_repository(rev) if len(graph_files) == 0 and len(config_files) == 0: self.mode = 'graphfiles' elif len(graph_files) > 0 and len(config_files) > 0: raise InvalidConfigurationError( "Conflict. Found graphfiles and QuitStore configuration file.") elif len(graph_files) > 0: self.mode = 'graphfiles' self.__init_graph_conf_with_blobs(graph_files, rev) elif len(config_files) == 1: self.mode = 'configuration' self.__init_graph_conf_from_configuration(config_files[0], rdf_files) else: raise InvalidConfigurationError( "Conflict. Found more than one QuitStore configuration file.")
def __init__(self, text=""): super(Sketch, self).__init__() self.rdfGraph = rdflib.Graph() self.namespace_manager = NamespaceManager(self.rdfGraph) self.SUPPORTED_FORMATS = ['xml', 'n3', 'turtle', 'nt', 'pretty-xml', 'dot'] PREFIXES = [ ("", "http://this.sketch#"), ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"), ("rdfs", "http://www.w3.org/2000/01/rdf-schema#"), ("xml", "http://www.w3.org/XML/1998/namespace"), ("xsd", "http://www.w3.org/2001/XMLSchema#"), ('foaf', "http://xmlns.com/foaf/0.1/"), ("npg", "http://ns.nature.com/terms/"), ("npgg", "http://ns.nature.com/graphs/"), ("npgx", "http://ns.nature.com/extensions/"), ("bibo", "http://purl.org/ontology/bibo/"), ("skos", "http://www.w3.org/2004/02/skos/core#"), ("owl", "http://www.w3.org/2002/07/owl#"), ] for pref in PREFIXES: self.bind(pref) if text: self.add(text)
def rdflib_ns_manager(self): """ For using prefixes in RDFlib graphs """ if self._rdflib_ns_manager is None: self._rdflib_ns_manager = NamespaceManager(Graph()) for namesp in self._ns_dict: self._rdflib_ns_manager.bind(namesp, self._ns_dict[namesp]) return self._rdflib_ns_manager
def main(labeled, wid_title_mapping, processed_out, discarded_out, dataset, format, resource_namespace, fact_namespace, ontology_namespace): # Namespace prefixes for RDF serialization RESOURCE_NS = Namespace(resource_namespace) FACT_EXTRACTION_NS = Namespace(fact_namespace) ONTOLOGY_NS = Namespace(ontology_namespace) NAMESPACE_MANAGER = NamespaceManager(Graph()) NAMESPACE_MANAGER.bind('resource', RESOURCE_NS) NAMESPACE_MANAGER.bind('fact', FACT_EXTRACTION_NS) NAMESPACE_MANAGER.bind('ontology', ONTOLOGY_NS) mapping = json.load(wid_title_mapping) with codecs.open(labeled, 'rb', 'utf8') as f: labeled = json.load(f) processed, discarded = to_assertions(labeled, mapping, NAMESPACE_MANAGER, { 'ontology': ONTOLOGY_NS, 'resource': RESOURCE_NS, 'fact_extraction': FACT_EXTRACTION_NS, }, outfile=dataset, format=format) with codecs.open(processed_out, 'wb', 'utf8') as f: f.writelines('\n'.join(processed)) with codecs.open(discarded_out, 'wb', 'utf8') as f: f.writelines('\n'.join(discarded))
def renderNetwork(network, nsMap={}): """ Takes an instance of a compiled ReteNetwork and a namespace mapping (for constructing QNames for rule pattern terms) and returns a BGL Digraph instance representing the Rete network #(from which GraphViz diagrams can be generated) """ # from FuXi.Rete import BuiltInAlphaNode # from BetaNode import LEFT_MEMORY, RIGHT_MEMORY, LEFT_UNLINKING dot = Dot(graph_type='digraph') namespace_manager = NamespaceManager(Graph()) for prefix, uri in list(nsMap.items()): namespace_manager.bind(prefix, uri, override=False) visitedNodes = {} edges = [] idx = 0 for node in list(network.nodes.values()): if node not in visitedNodes: idx += 1 visitedNodes[node] = generateBGLNode( dot, node, namespace_manager, str(idx)) dot.add_node(visitedNodes[node]) nodeIdxs = {} for node in list(network.nodes.values()): for mem in node.descendentMemory: if not mem: continue bNode = mem.successor for bNode in node.descendentBetaNodes: for idx, otherNode in enumerate([bNode.leftNode, bNode.rightNode]): if node == otherNode and (node, otherNode) not in edges: for i in [node, bNode]: if i not in visitedNodes: idx += 1 nodeIdxs[i] = idx visitedNodes[i] = generateBGLNode( dot, i, namespace_manager, str(idx)) dot.add_node(visitedNodes[i]) edge = Edge(visitedNodes[node], visitedNodes[bNode], label=idx == 0 and 'left' or 'right') dot.add_edge(edge) edges.append((node, bNode)) return dot
def testExpand(self): EX = Namespace("http://example.com/") namespace_manager = NamespaceManager(Graph()) namespace_manager.bind('ex', EX, override=False) self.testGraph.namespace_manager = namespace_manager man = Class(EX.Man) boy = Class(EX.Boy) woman = Class(EX.Woman) girl = Class(EX.Girl) male = Class(EX.Male) female = Class(EX.Female) human = Class(EX.Human) animal = Class(EX.Animal) cat = Class(EX.Cat) dog = Class(EX.Dog) animal = Class(EX.Animal) animal = cat | dog | human human += man human += boy human += woman human += girl male += man male += boy female += woman female += girl testClass = human & ~ female self.assertEquals(repr(testClass), 'ex:Human THAT ( NOT ex:Female )') newtestClass = ComplementExpansion(testClass, debug=True) self.assertTrue(repr(newtestClass) in [ '( ex:Boy or ex:Man )', '( ex:Man or ex:Boy )'], repr(newtestClass)) testClass2 = animal & ~ (male | female) self.assertEquals(repr(testClass2), '( ( ex:Cat or ex:Dog or ex:Human ) and ( not ( ex:Male or ex:Female ) ) )') newtestClass2 = ComplementExpansion(testClass2, debug=True) testClass2Repr = repr(newtestClass2) self.assertTrue(testClass2Repr in [ '( ex:Cat or ex:Dog )', '( ex:Dog or ex:Cat )'], testClass2Repr)
def dataset(self): #pdb.set_trace() if hasattr(self._connection, 'dataset'): return getattr(self._connection, 'dataset') if self.store=='Sleepycat': dataset = Dataset(store=self.store, default_union=True) dataset.open(self.store_path, create = True) else: self.store = Virtuoso(self.connection) #dataset = Dataset(store=self.store, default_union=True) dataset = ConjunctiveGraph(store=self.store,identifier=CENDARI) self.store.connection # force connection setattr(self._connection, 'dataset', dataset) nm = NamespaceManager(dataset) for (prefix, ns) in INIT_NS.iteritems(): nm.bind(prefix, ns) dataset.namespace_manager = nm return dataset
def init_database(self): """ Open the configured database """ self._init_rdf_graph() L.debug("opening " + str(self.source)) self.source.open() nm = NamespaceManager(self['rdf.graph']) self['rdf.namespace_manager'] = nm self['rdf.graph'].namespace_manager = nm # A runtime version number for the graph should update for all changes # to the graph self['rdf.graph.change_counter'] = 0 self['rdf.graph']._add = self['rdf.graph'].add self['rdf.graph']._remove = self['rdf.graph'].remove self['rdf.graph'].add = self._my_graph_add self['rdf.graph'].remove = self._my_graph_remove nm.bind("", self['rdf.namespace'])
def newgraph(request): print request.method # Create and bind namespaces namespace_manager = NamespaceManager(Graph()) for ns in namespaces_dict: namespace_manager.bind(ns, Namespace(namespaces_dict[ns])) # Create a new graph graph = Graph() graph.namespace_manager = namespace_manager triple_list = [] subject_list = [] predicate_list = [] subject_set = {} predicate_set = {} object_set = {} # Determine xml:base subject_base_test_set = {triple[0] for triple in triple_list} base_set = {subject[:subject.rfind("/")] for subject in subject_base_test_set} # If all subjects share the same substring-base, this substring-base is likely to be the xml:base. if len(base_set) == 1: base = str(list(base_set)[0]) + "/" else: base = "" # Serialize graph rdfjson = graph.serialize(None, format="rdf-json") # triple_fetcher_classes = get_triple_fetcher_classes() response = render_to_response('rdfedit/triples.html', {'rdfjson': rdfjson, 'triple_list': triple_list, 'subject_set': subject_set, 'predicate_set': predicate_set, 'object_set': object_set, 'namespaces_dict': json.dumps(namespaces_dict), 'base': base, 'triple_fetcher_classes': triple_fetcher_classes}, context_instance=RequestContext(request)) return response
def load_ontology(): from FuXi.Horn.HornRules import HornFromN3 from FuXi.Rete.Util import generateTokenSet from FuXi.Rete.RuleStore import SetupRuleStore rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True) for rule in HornFromN3('rdfs-rules.n3'): network.buildNetworkFromClause(rule) # for rule in HornFromN3('owl-rules.n3'): # network.buildNetworkFromClause(rule) g = Graph(identifier='http://catalyst-fp8.eu/ontology') npm = NamespaceManager(g) g.namespace_manager = npm for name in ('SIOC','OA','CATALYST','IDEA','IBIS','VOTE','VERSION','ASSEMBL','OWL','RDF', 'OWL', 'RDFS', 'XSD'): npm.bind(name.lower(), globals()[name]) for f in ontology_files: g.parse(join(dirname(__file__), f), format='turtle') network.feedFactsToAdd(generateTokenSet(g)) for n in network.inferredFacts.triples((None, None, None)): g.add(n) return g
def __init__(self, path=None): """ If not path is passed it build a graph in memory. Otherwise, it creates a persistent graph in disk. """ if path is not None: # Create persistent Graph in disk self.path = path self.graph = ConjunctiveGraph('Sleepycat', URIRef("kb4it://")) graph_path = path + SEP + 'kb4it.graph' self.graph.store.open(graph_path) else: # Create Graph in Memory self.graph = ConjunctiveGraph('IOMemory') # Assign namespaces to the Namespace Manager of this graph namespace_manager = NamespaceManager(ConjunctiveGraph()) for ns in NSBINDINGS: namespace_manager.bind(ns, NSBINDINGS[ns]) self.graph.namespace_manager = namespace_manager
class Prologue: """ A class for holding prefixing bindings and base URI information """ def __init__(self): self.base = None self.namespace_manager = NamespaceManager( Graph()) # ns man needs a store def resolvePName(self, prefix, localname): ns = self.namespace_manager.store.namespace(prefix or "") if ns is None: raise Exception('Unknown namespace prefix : %s' % prefix) return URIRef(ns + (localname or "")) def bind(self, prefix, uri): self.namespace_manager.bind(prefix, uri, replace=True) def absolutize(self, iri): """ Apply BASE / PREFIXes to URIs (and to datatypes in Literals) TODO: Move resolving URIs to pre-processing """ if isinstance(iri, CompValue): if iri.name == 'pname': return self.resolvePName(iri.prefix, iri.localname) if iri.name == 'literal': return Literal( iri.string, lang=iri.lang, datatype=self.absolutize(iri.datatype)) elif isinstance(iri, URIRef) and not ':' in iri: return URIRef(iri, base=self.base) return iri
def serialize(self, add, delete): diff = Namespace("http://topbraid.org/diff#") g = ConjunctiveGraph() namespace_manager = NamespaceManager(g) namespace_manager.bind('diff', diff, override=False) namespace_manager.bind('owl', OWL, override=False) graphUris = set(delete.keys()) | set(add.keys()) for graphUri in graphUris: if (graphUri in delete.keys() and len(delete[graphUri]) > 0) or (graphUri in add.keys() and len(add[graphUri]) > 0): changeset = Namespace("urn:diff:" + str(uuid.uuid1())) graphTerm = changeset.term("") if str(graphUri) != 'http://quitdiff.default/': g.add((graphTerm, OWL.imports, graphUri, graphTerm)) g.add((graphTerm, RDF.type, OWL.Ontology, graphTerm)) g.add((graphTerm, OWL.imports, diff.term(""), graphTerm)) if graphUri in delete.keys() and len(delete[graphUri]) > 0: i = 0 for triple in delete[graphUri]: deleteStatementName = BNode() g.add((deleteStatementName, RDF.type, diff.DeletedTripleDiff, graphTerm)) g.add((deleteStatementName, RDF.subject, triple[0], graphTerm)) g.add((deleteStatementName, RDF.predicate, triple[1], graphTerm)) g.add((deleteStatementName, RDF.object, triple[2], graphTerm)) i += 1 if graphUri in add.keys() and len(add[graphUri]) > 0: i = 0 for triple in add[graphUri]: insertGraphName = BNode() g.add((insertGraphName, RDF.type, diff.AddedTripleDiff, graphTerm)) g.add((insertGraphName, RDF.subject, triple[0], graphTerm)) g.add((insertGraphName, RDF.predicate, triple[1], graphTerm)) g.add((insertGraphName, RDF.object, triple[2], graphTerm)) i += 1 return g.serialize(format="trig").decode("utf-8")
class QNameManager(object): def __init__(self,nsDict=None): self.nsDict = nsDict and nsDict or {} self.nsMgr = NamespaceManager(Graph()) self.nsMgr.bind('owl','http://www.w3.org/2002/07/owl#') self.nsMgr.bind('math','http://www.w3.org/2000/10/swap/math#') def bind(self,prefix,namespace): self.nsMgr.bind(prefix,namespace)
def toURIRef(self,manager:NamespaceManager) -> URIRef: """ Convert to URI Reference :param manager: :class:`rdflib.namespace.NamespaceManager` used to resolve namespace :return: A :class:`rdflib.URIRef` """ if ":" not in self.name: return None head,tail=self.name.split(':',1) for prefix,ns in manager.namespaces(): if prefix==head: return ns+tail return URIRef(self.name)
def write_to_turtle_rdf(df, output_file): g = Graph() namespace_manager = NamespaceManager(Graph()) n_geo = Namespace("http://sws.geonames.org/") n_custom_ont = Namespace( "http://vocab.informatik.tuwien.ac.at/VU184.729-2018/e01429253/ontology/" ) n_custom_cls = Namespace( "http://vocab.informatik.tuwien.ac.at/VU184.729-2018/e01429253/class/") n_time = Namespace("http://www.w3.org/2006/time/") namespace_manager.bind('tuwo', n_custom_ont, override=False) namespace_manager.bind('tuwc', n_custom_cls, override=False) namespace_manager.bind('gn', n_geo, override=False) namespace_manager.bind('time', n_time, override=False) g.namespace_manager = namespace_manager # define properties movement_property = n_custom_ont['populationMovement'] orig_country_property = n_custom_ont['countryOfOrigin'] pop_type_property = n_custom_ont['populationType'] year_property = n_time['year'] pop_amount_property = n_custom_ont['peopleAmount'] # add nodes to the graph for index, row in df.iterrows(): # blank node for connection relation_node = BNode() # a GUID is generated # base triple (residence_country, movement, blank_node) g.add((n_geo[str(int(row[0]))], movement_property, relation_node)) # child properties g.add((relation_node, orig_country_property, n_geo[str(int(row[1]))])) g.add((relation_node, pop_type_property, n_custom_cls[row[2]])) g.add((relation_node, year_property, Literal(int(row[3])))) g.add((relation_node, pop_amount_property, Literal(int(row[4])))) # write to output file g.serialize(destination=output_file, format='turtle')
class NIFPrefixes: def __init__(self): self.manager = NamespaceManager(Graph()) self.manager.bind("xsd", XSD) self.manager.bind("itsrdf", ITSRDF) self.manager.bind("nif", NIF) self._XSD = '@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n' self._ITSRDF = '@prefix itsrdf: <http://www.w3.org/2005/11/its/rdf#> .\n' self._NIF = '@prefix nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#> .\n' @property def turtle(self): return self._XSD + self._ITSRDF + self._NIF
def run_loader(self, data_source=None, version=None, name=None, id=None, extensions=None, verbosity=1): """ load the given ontology file in xml format into the database Keyword Arguments: data_source -- path to the source file version -- version of the ontology being loaded """ if verbosity > 0: print '' print 'Loading Source Ontology: "%s"' % data_source if data_source is not None and version is not None: self.graph = Graph() self.namespace_manager = NamespaceManager(self.graph) self.subclass_cache = {} with transaction.atomic(): ontology = self.add_ontology(id=id, data_source=data_source, version=version, name=name) loaded_extensions = [extension.path.path for extension in models.Ontology.objects.filter(parentontology=ontology)] if extensions is None: extensions = loaded_extensions else: extensions = extensions.split(',') + loaded_extensions for extension in set(extensions): if verbosity > 0: print 'Loading Extension: "%s"' % extension if os.path.isfile(extension): self.add_ontology(data_source=extension, version=version, name=name, parentontology=ontology) else: # delete references to ontolgy files that don't exist on disk models.Ontology.objects.filter(path=self.get_relative_path(extension)).delete() models.OntologyClass.objects.filter(ontology=ontology).delete() for ontology_class, data in self.crawl_graph().iteritems(): models.OntologyClass.objects.update_or_create(source=ontology_class, ontology=ontology, defaults={'target': data})
def serialize(self, add, delete): diff = Namespace("http://topbraid.org/diff#") g = ConjunctiveGraph() namespace_manager = NamespaceManager(g) namespace_manager.bind('diff', diff, override=False) namespace_manager.bind('owl', OWL, override=False) graphUris = set(delete.keys()) | set(add.keys()) for graphUri in graphUris: if (graphUri in delete.keys() and len(delete[graphUri]) > 0) or ( graphUri in add.keys() and len(add[graphUri]) > 0): changeset = Namespace("urn:diff:" + str(uuid.uuid1())) graphTerm = changeset.term("") if str(graphUri) != 'http://quitdiff.default/': g.add((graphTerm, OWL.imports, graphUri, graphTerm)) g.add((graphTerm, RDF.type, OWL.Ontology, graphTerm)) g.add((graphTerm, OWL.imports, diff.term(""), graphTerm)) if graphUri in delete.keys() and len(delete[graphUri]) > 0: i = 0 for triple in delete[graphUri]: deleteStatementName = BNode() g.add((deleteStatementName, RDF.type, diff.DeletedTripleDiff, graphTerm)) g.add((deleteStatementName, RDF.subject, triple[0], graphTerm)) g.add((deleteStatementName, RDF.predicate, triple[1], graphTerm)) g.add((deleteStatementName, RDF.object, triple[2], graphTerm)) i += 1 if graphUri in add.keys() and len(add[graphUri]) > 0: i = 0 for triple in add[graphUri]: insertGraphName = BNode() g.add((insertGraphName, RDF.type, diff.AddedTripleDiff, graphTerm)) g.add((insertGraphName, RDF.subject, triple[0], graphTerm)) g.add((insertGraphName, RDF.predicate, triple[1], graphTerm)) g.add((insertGraphName, RDF.object, triple[2], graphTerm)) i += 1 return g.serialize(format="trig").decode("utf-8")
def _get_str_for_uriref(namespace_manager: NamespaceManager, uriref: URIRef) -> str: """ Reusing NamespaceManager.normalizeUri for transforming Graph to DataFrame. In effect we only need to strip < and > from N3 representation and forget the case of URIRef being a rdflib.term.Variable. Parameters ---------- namespace_manager : rdflib.namespace.NamespaceManager NamespaceManager to use to normalize URIs uriref : rdflib.URIRef URI to normalize Returns ------- str Normalised URI string. """ return re.sub('<|>', '', namespace_manager.normalizeUri(uriref))
def __init__( self, configfile='config.ttl', features=None, upstream=None, targetdir=None, namespace=None, oauthclientid=None, oauthclientsecret=None ): """Initialize store configuration. This method checks if the config file is given and reads the config file. If the config file is missing, it will be generated after analyzing the file structure. """ logger = logging.getLogger('quit.conf.QuitConfiguration') logger.debug('Initializing configuration object.') self.features = features self.configchanged = False self.sysconf = Graph() self.upstream = None self.namespace = None self.oauthclientid = oauthclientid self.oauthclientsecret = oauthclientsecret self.nsMngrSysconf = NamespaceManager(self.sysconf) self.nsMngrSysconf.bind('', self.quit, override=False) try: self.__initstoreconfig( namespace=namespace, upstream=upstream, targetdir=targetdir, configfile=configfile) except InvalidConfigurationError as e: logger.error(e) raise e
def create_molecule_graph(uri_base, mol): mongochem = Namespace('%s/api/v1/molecules/' % uri_base) g = Graph() inchi = mol['inchi'] name = mol.get('name') inchi_node = BNode() molecule = URIRef(mongochem[mol['_id']]) namespace_manager = NamespaceManager(g) namespace_manager.bind('cheminf', cheminf, override=False) namespace_manager.bind('mongochem', mongochem, override=False) namespace_manager.bind('owl', OWL, override=False) g.add((molecule, OWL.subClassOf, cheminf.CHEMINF_000000)) if name is not None: g.add((molecule, OWL.label, Literal(name.lower()))) g.add((inchi_node, RDF.type, cheminf.CHEMINF_000113)) g.add((inchi_node, cheminf.SIO_000300, Literal(inchi))) g.add((molecule, cheminf.CHEMINF_000200, inchi_node)) return g.serialize()
def _render_skos_rdf(self): namespace_manager = NamespaceManager(Graph()) namespace_manager.bind('dct', DCTERMS) namespace_manager.bind('skos', SKOS) concept_g = Graph() concept_g.namespace_manager = namespace_manager for s, p, o in g.triples((URIRef(self.concept.uri), None, None)): concept_g.add((s, p, o)) # serialise in the appropriate RDF format if self.format in ['application/rdf+json', 'application/json']: return Response(concept_g.serialize(format='json-ld'), mimetype=self.format) else: return Response(concept_g.serialize(format=self.format), mimetype=self.format)
def __init__(self, resultpath, harvest_source_id): self.path = resultpath self.g = rdflib.Graph() for k, v in namespaces.items(): self.g.bind(k, v) self.g.namespace_manager = NamespaceManager(self.g) self.harvest_source_id = harvest_source_id self.shaclkeys = [ 'sh_focusnode', 'sh_severity', 'sh_path', 'sh_constraint', 'sh_message', 'sh_value', 'sh_shape', 'sh_detail' ] self.shaclpredicates = [ SHACL.focusNode, SHACL.resultSeverity, SHACL.resultPath, SHACL.sourceConstraintComponent, SHACL.resultMessage, SHACL.value, SHACL.sourceShape, SHACL.resultDetail ] self.resultdictkeys = self.shaclkeys[:] self.resultdictkey_parseerror = 'parseerror' self.resultdictkey_harvestsourceid = 'harvest_source_id' self.resultdictkeys.extend([ self.resultdictkey_harvestsourceid, self.resultdictkey_parseerror ])
def showSelection(): # flash('RDF file successfully created') BMWD = Namespace('https://www.materials.fraunhofer.de/ontologies/BWMD_ontology/mid#') UNIT = Namespace('http://www.ontologyrepository.com/CommonCoreOntologies/Mid/InformationEntityOntology')#http://www.qudt.org/2.1/vocab/unit BS = Namespace('https://w3id.org/def/basicsemantics-owl#') g = Graph() g.namespace_manager = NamespaceManager(Graph()) g.namespace_manager.bind('unit', UNIT) g.namespace_manager.bind('bs', BS) g.namespace_manager.bind('bmwd', BMWD) print('in showSelection method changed---') print(request.form.getlist('dropdown')) print(str(request.form.get("search1"))) # test=request.form.get('dropdown1') for i in range(0, 30): test = request.form.get('dropdown' + str(i)) if test: # filter_data = re.split("\s\s|;|\t", test) filter_data = [i for i in re.split("\s\s|;|\t", test) if i != ''] print(filter_data) if len(filter_data) <= 2: g.add((BMWD[request.form.get("search" + str(i))], BS['hasValue'], Literal(filter_data[-1]))) else: if re.findall('[0-9]+', filter_data[1]): g.add((BMWD[request.form.get("search" + str(i))], UNIT['hasUnit'], Literal(filter_data[2]))) g.add((BMWD[request.form.get("search" + str(i))], BS['hasValue'], Literal(filter_data[1]))) else: g.add((BMWD[request.form.get("search" + str(i))], BS['hasValue'], Literal(filter_data[2]))) g.add((BMWD[request.form.get("search" + str(i))], UNIT['hasUnit'], Literal(filter_data[1]))) # print(len([i for i in b if i != ''])) print(str(request.form.get("search" + str(i)))) return g
def tobj(objname): SCHEMA = Namespace('http://schema.org/') SPDX = Namespace('http://www.spdx.org/rdf/terms#') n = NamespaceManager(Graph()) n.bind("schema", SCHEMA) n.bind("spdx", SPDX) c = get_db().cursor() c.execute('SELECT * FROM objects WHERE id=?', (objname,)) obj = c.fetchone() g = Graph() g.namespace_manager = n objuri = URIRef("http://localhost:5000/b/" + obj[0]) robjuri = URIRef("http://localhost:5000/r/" + obj[0]) md5node = BNode() g.add((md5node, SPDX.checksumValue, Literal(obj[2]))) g.add((md5node, SPDX.algorithm, URIRef("http://packages.qa.debian.org/#checksumAlgorithm_md5sum"))) g.add((objuri, SPDX.checksum, md5node)) g.add((objuri, SCHEMA.fileSize, Literal(obj[1]))) return Response(g.serialize(format="turtle"), mimetype="text/plain")
def test_should_roundtrip_csv_to_graph_to_csv(self): """Should roundtrip DF -> Graph -> DF """ df = pd.read_csv('./tests/csv/test.csv', index_col='@id', keep_default_na=True) namespace_manager = NamespaceManager(Graph()) namespace_manager.bind('skos', SKOS) namespace_manager.bind( 'rdfpandas', Namespace('http://github.com/cadmiumkitty/rdfpandas/')) g = rdfpandas.to_graph(df, namespace_manager) df_result = rdfpandas.to_dataframe(g) pd.testing.assert_frame_equal(df.astype(np.unicode_), df_result.astype(np.unicode_), check_like=True, check_names=False)
def generate_authority_rdf(authority): g = Graph() auth = URIRef("http://data.isiscb.org/authority/" + authority.id) #urllib.quote(authority.name.replace(" ", "_"))) type = get_auth_type(authority.type_controlled) if not type: return '' g.add( (auth, RDF.type, type) ) g.add( (auth, RDF.type, madsrdf.Authority) ) g.add( (auth, RDFS.label, Literal(authority.name)) ) g.add( (auth, madsrdf.authoritativeLabel, Literal(authority.name)) ) for attr in authority.attributes.all(): attr_pred = get_property(attr.type_controlled.name) if attr_pred: g.add( (auth, attr_pred, Literal(attr.value_freeform))) nsMgr = NamespaceManager(g) nsMgr.bind('madsrdf', madsrdf) nsMgr.bind('isiscb', isisns) nsMgr.bind('isisvocab', isisns_props) return g.serialize(format='application/rdf+xml')
def _render_skos_rdf(self): namespace_manager = NamespaceManager(Graph()) namespace_manager.bind("dct", DCTERMS) namespace_manager.bind("skos", SKOS) concept_g = Graph() concept_g.namespace_manager = namespace_manager for s, p, o in self.concept.source.graph.triples( (URIRef(self.concept.uri), None, None) ): concept_g.add((s, p, o)) # serialise in the appropriate RDF format if self.mediatype in ["application/rdf+json", "application/json"]: return Response( concept_g.serialize(format="json-ld"), mimetype=self.mediatype ) else: return Response( concept_g.serialize(format=self.mediatype), mimetype=self.mediatype )
def get_queries (model): '''Returns a list of test sparql queries for the given model''' queries = [] generate_queries (data, queries, str(NS['sp'][model])) return queries # Query builder state variables main_types = [] data = {} loaded = False # Initialize the namespace manager object namespace_manager = NamespaceManager(Graph()) # Import the namespaces into the namespace manager for ns in NS.keys(): namespace_manager.bind(ns, NS[ns], override=False) # Parse the ontology when necessary if not rdf_ontology.api_types: rdf_ontology.parse_ontology(open(APP_PATH + '/data/smart.owl').read()) # Build a list of data types that need to be added to the data definitions for t in rdf_ontology.api_types: if t.is_statement or len(t.calls) > 0 or rdf_ontology.sp.Component in [x.uri for x in t.parents]: main_types.append(t) # Build the data definitions object with each data type
import sys from rdflib import URIRef, Literal, BNode from rdflib.namespace import RDF, SKOS, RDFS, OWL, Namespace, NamespaceManager, XSD BF = Namespace("http://id.loc.gov/ontologies/bibframe/") BDR = Namespace("http://purl.bdrc.io/resource/") BDO = Namespace("http://purl.bdrc.io/ontology/core/") TMP = Namespace("http://purl.bdrc.io/ontology/tmp/") BDG = Namespace("http://purl.bdrc.io/graph/") BDA = Namespace("http://purl.bdrc.io/admindata/") ADM = Namespace("http://purl.bdrc.io/ontology/admin/") MBBT = Namespace("http://mbingenheimer.net/tools/bibls/") CBCT_URI = "https://dazangthings.nz/cbc/text/" CBCT = Namespace(CBCT_URI) NSM = NamespaceManager(rdflib.Graph()) NSM.bind("bdr", BDR) NSM.bind("", BDO) NSM.bind("bdg", BDG) NSM.bind("bda", BDA) NSM.bind("adm", ADM) NSM.bind("skos", SKOS) NSM.bind("rdf", RDF) NSM.bind("cbct", CBCT) NSM.bind("mbbt", MBBT) NSM.bind("bf", BF) K_TO_T = {} K_TO_SKT = {} T_TO_SKT = {}
def _get_namespace_manager(self): if self.__namespace_manager is None: self.__namespace_manager = NamespaceManager(self) return self.__namespace_manager
#=============================================================================== import os #=============================================================================== from rdflib import Graph from rdflib.namespace import Namespace, NamespaceManager import yaml #=============================================================================== with open(os.path.join(os.path.split(__file__)[0], 'curie_map.yaml')) as f: curie_map = yaml.load(f, Loader=yaml.Loader) SCICRUNCH_NS = NamespaceManager(Graph()) _namespaces = {} for prefix, url in curie_map.items(): ns = Namespace(url) SCICRUNCH_NS.bind(prefix, ns, override=True) _namespaces[prefix] = ns #=============================================================================== def namespaces_dict(): return _namespaces #===============================================================================
if self.base: self.write(self.indent() + '@base <%s> .\n' % self.base) if self._spacious: self.write('\n') turtle.TurtleSerializer.startDocument = turtle_patched_startDocument # Namespaces in use from rdflib.namespace import RDF, RDFS, XSD, DCTERMS, FOAF BIO = Namespace("http://purl.org/vocab/bio/0.1/") SCHEMA = Namespace("http://schema.org/") MO = Namespace("http://purl.org/ontology/mo/") EVENT = Namespace("http://purl.org/NET/c4dm/event.owl#") TL = Namespace("http://purl.org/NET/c4dm/timeline.owl#") ns = NamespaceManager(Graph()) namespaces = [(n.lower(), globals()[n]) for n in dict(globals()) if isinstance(globals()[n], Namespace)] for name, namespace in namespaces: ns.bind(name, namespace) def generate(config, type, data): global base base = config.BASE_URL func_name = 'generate_%s' % type if func_name in globals(): graph = globals()['generate_%s' % type](config, data) else: doc = BNode() uri = base
BIBO = Namespace('http://purl.org/ontology/bibo/') FOAF = Namespace('http://xmlns.com/foaf/0.1/') SKOS = Namespace('http://www.w3.org/2004/02/skos/core#') VCARD = Namespace('http://www.w3.org/2006/vcard/ns#') OBO = Namespace('http://purl.obolibrary.org/obo/') CONVERIS = Namespace('http://localhost/ontology/converis#') FHP = Namespace('http://vivo.fredhutch.org/ontology/publications#') FHD = Namespace('http://vivo.fredhutch.org/ontology/display#') #tmp graph for in memory graphs TMP = Namespace('http://localhost/tmp#') namespaces = {} for k, o in vars().items(): if isinstance(o, (Namespace, ClosedNamespace)): namespaces[k] = o ns_mgr = NamespaceManager(Graph()) for k, v in namespaces.items(): ns_mgr.bind(k.lower(), v) rq_prefixes = u"\n".join("prefix %s: <%s>" % (k.lower(), v) for k, v in namespaces.items()) prefixes = u"\n ".join("%s: %s" % (k.lower(), v) for k, v in namespaces.items() if k not in u'RDF RDFS OWL XSD') #namespace setup complete
def get_dpn_instance_graph(dpn_graph): nm = NamespaceManager(dpn_graph) #namespace_manager.bind('dpn', dpn, override=False) g = Graph(namespace_manager = nm) return g
from django.contrib.contenttypes.fields import GenericRelation from django.core.exceptions import ValidationError from rdflib import Graph, BNode from rdflib.collection import Collection from rdflib.namespace import Namespace, NamespaceManager, DC, DCTERMS, RDF, RDFS from rdflib.plugin import register from rdflib.plugins.serializers.rdfxml import XMLLANG, OWL_NS, XMLBASE from rdflib.plugins.serializers.xmlwriter import XMLWriter from rdflib.serializer import Serializer from rdflib.term import Literal, URIRef from rdflib.util import first HSTERMS = Namespace("https://www.hydroshare.org/terms/") RDFS1 = Namespace("http://www.w3.org/2000/01/rdf-schema#") NAMESPACE_MANAGER = NamespaceManager(Graph()) NAMESPACE_MANAGER.bind('hsterms', HSTERMS, override=False) NAMESPACE_MANAGER.bind("rdfs1", RDFS1, override=False) NAMESPACE_MANAGER.bind('dc', DC, override=False) NAMESPACE_MANAGER.bind('dcterms', DCTERMS, override=False) class RDF_MetaData_Mixin(object): """ A mixin for MetaData objects which store their metadata in generic relations. If metadata outside of generic relations need to be used, you may extend ingest_metadata and get_rdf_graph to include the other metadata elements """ def rdf_subject(self): raise NotImplementedError("RDF_Metadata_Mixin implementations must implement rdf_subject")
def from_n3(s, default=None, backend=None, nsm=None): r''' Creates the Identifier corresponding to the given n3 string. >>> from_n3('<http://ex.com/foo>') == URIRef('http://ex.com/foo') True >>> from_n3('"foo"@de') == Literal('foo', lang='de') True >>> from_n3('"""multi\nline\nstring"""@en') == Literal( ... 'multi\nline\nstring', lang='en') True >>> from_n3('42') == Literal(42) True >>> from_n3(Literal(42).n3()) == Literal(42) True >>> from_n3('"42"^^xsd:integer') == Literal(42) True >>> from rdflib import RDFS >>> from_n3('rdfs:label') == RDFS['label'] True >>> nsm = NamespaceManager(Graph()) >>> nsm.bind('dbpedia', 'http://dbpedia.org/resource/') >>> berlin = URIRef('http://dbpedia.org/resource/Berlin') >>> from_n3('dbpedia:Berlin', nsm=nsm) == berlin True ''' if not s: return default if s.startswith('<'): # Hack: this should correctly handle strings with either native unicode # characters, or \u1234 unicode escapes. return URIRef( s[1:-1].encode("raw-unicode-escape").decode("unicode-escape")) elif s.startswith('"'): if s.startswith('"""'): quotes = '"""' else: quotes = '"' value, rest = s.rsplit(quotes, 1) value = value[len(quotes):] # strip leading quotes datatype = None language = None # as a given datatype overrules lang-tag check for it first dtoffset = rest.rfind('^^') if dtoffset >= 0: # found a datatype # datatype has to come after lang-tag so ignore everything before # see: http://www.w3.org/TR/2011/WD-turtle-20110809/ # #prod-turtle2-RDFLiteral datatype = from_n3(rest[dtoffset + 2:], default, backend, nsm) else: if rest.startswith("@"): language = rest[1:] # strip leading at sign value = value.replace(r'\"', '"') # Hack: this should correctly handle strings with either native unicode # characters, or \u1234 unicode escapes. value = value.encode("raw-unicode-escape").decode("unicode-escape") return Literal(value, language, datatype) elif s == 'true' or s == 'false': return Literal(s == 'true') elif s.isdigit(): return Literal(int(s)) elif s.startswith('{'): identifier = from_n3(s[1:-1]) return QuotedGraph(backend, identifier) elif s.startswith('['): identifier = from_n3(s[1:-1]) return Graph(backend, identifier) elif s.startswith("_:"): return BNode(s[2:]) elif ':' in s: if nsm is None: # instantiate default NamespaceManager and rely on its defaults nsm = NamespaceManager(Graph()) prefix, last_part = s.split(':', 1) ns = dict(nsm.namespaces())[prefix] return Namespace(ns)[last_part] else: return BNode(s)
from rdflib.namespace import NamespaceManager, DC #, FOAF from .resolver import get_URI_for_AILLA, get_URI_for_ANLA, get_URI_for_TLA, get_URI_for_Paradisec, get_URI_for_ELAR #define general namespaces QUEST = Namespace("http://zasquest.org/") QUESTRESOLVER = Namespace("http://zasquest.org/resolver/") DBPEDIA = Namespace("http://dbpedia.org/ontology/") WIKIDATA = Namespace("http://www.wikidata.org/entity/") LGR = Namespace("https://www.eva.mpg.de/lingua/resources/glossing-rules.php/") LIGT = Namespace("http://purl.org/liodi/ligt/") FLEX = Namespace("http://example.org/flex/") NIF = Namespace("http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#") #define archive namespaces ELD_NAMESPACE_MANAGER = NamespaceManager(Graph()) ELD_NAMESPACE_MANAGER.bind('dbpedia', DBPEDIA) ELD_NAMESPACE_MANAGER.bind('wikidata', WIKIDATA) ELD_NAMESPACE_MANAGER.bind('quest', QUEST) #for ontology ELD_NAMESPACE_MANAGER.bind('QUESTRESOLVER', QUESTRESOLVER) #for the bridge for rewritable URLs ELD_NAMESPACE_MANAGER.bind("rdfs", RDFS) ELD_NAMESPACE_MANAGER.bind("dc", DC) ELD_NAMESPACE_MANAGER.bind("lgr", LGR) ELD_NAMESPACE_MANAGER.bind("ligt", LIGT) ELD_NAMESPACE_MANAGER.bind("flex", FLEX) ELD_NAMESPACE_MANAGER.bind("nif", NIF) ARCHIVE_NAMESPACES = { 'paradisec': Namespace("https://catalog.paradisec.org.au/collections/"), #'elarcorpus': Namespace("https://lat1.lis.soas.ac.uk/corpora/ELAR/"), 'elarcorpus': Namespace("https://elar.soas.ac.uk/Record/"),
import json from rdflib import URIRef, BNode, Literal, Graph, Namespace from rdflib.namespace import RDF, FOAF, OWL, RDFS, NamespaceManager from rdflib.extras.infixowl import Restriction, Individual import pandas as pd import numpy as np from urllib.request import urlopen, quote import SemanticProcessor.mappings as mappings import SemanticProcessor.concepts as concepts chronicals = Namespace(concepts.BASE_URL) namespace_manager = NamespaceManager(Graph()) def decode(g): query = """SELECT ?headache ?duration ?characterisation ?intensity ?location ?prev_attacks ?diagnosis WHERE { ?headache rdf:type ?headache_type . ?headache ?duration_predicate ?duration . ?headache ?characterisation_predicate ?characterisation . ?headache ?intensity_predicate ?intensity . ?headache ?location_predicate ?location . ?headache ?prev_atk_predicate ?prev_attacks . ?headache ?diagnosis_predicate ?diagnosis . }""" symptom_query = """SELECT ?symptom WHERE{ ?headache ?symptom_predicate ?symptom . } """ qres = g.query(query,
SKOS = Namespace('http://www.w3.org/2004/02/skos/core#') #RDF namespace RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') #CiTO namespace CITO = Namespace('http://purl.org/spar/cito/') #RDFS RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#') #LThe local namespace VLOCAL = Namespace('http://connect.unavco.org/ontology/vlocal#') #WGS84 namespace WGS84 = Namespace('http://www.w3.org/2003/01/geo/wgs84_pos#') #OWL namespace OWL = Namespace('http://www.w3.org/2002/07/owl#') VITROPUBLIC = Namespace('http://vitro.mannlib.cornell.edu/ns/vitro/public#') ns_manager = NamespaceManager(Graph()) ns_manager.bind('d', D) ns_manager.bind('vivo', VIVO) ns_manager.bind('vcard', VCARD) ns_manager.bind('obo', OBO) ns_manager.bind('bibo', BIBO) ns_manager.bind("foaf", FOAF) ns_manager.bind("skos", SKOS) ns_manager.bind("cito", CITO) ns_manager.bind("rdfs", RDFS) ns_manager.bind("vlocal", VLOCAL) ns_manager.bind("wgs84", WGS84) ns_manager.bind("vitropublic", VITROPUBLIC) ns_manager.bind("owl", OWL)
import sys import os import csv import re import rdflib from rdflib import URIRef, Literal, BNode from rdflib.namespace import RDF, SKOS, Namespace, NamespaceManager, XSD BDR = Namespace("http://purl.bdrc.io/resource/") BDO = Namespace("http://purl.bdrc.io/ontology/core/") BDG = Namespace("http://purl.bdrc.io/graph/") BDA = Namespace("http://purl.bdrc.io/admindata/") ADM = Namespace("http://purl.bdrc.io/ontology/admin/") NSM = NamespaceManager(rdflib.Graph()) NSM.bind("bdr", BDR) NSM.bind("", BDO) NSM.bind("bdg", BDG) NSM.bind("bda", BDA) NSM.bind("adm", ADM) NSM.bind("skos", SKOS) def linestordf(csvlines, graphname): """ Returns an RDF graph or dataset from a yaml object """ curidx = 0 ds = rdflib.Dataset() g = ds.graph(BDG[graphname]) g.namespace_manager = NSM
def __init__(self, model_uri, sparql_wrapper=None, threshold=0.3, include_body: bool = False, resolve: bool = True, use_caching: bool = False): self._graph = rdflib.Graph() self.thesoz = SkosThesaurusMatcher( self._graph, thesaurus_path="claimskg/data/thesoz-komplett.xml", skos_xl_labels=True, prefix="http://lod.gesis.org/thesoz/") self._graph = self.thesoz.get_merged_graph() self.unesco = SkosThesaurusMatcher( self._graph, thesaurus_path="claimskg/data/unesco-thesaurus.xml", skos_xl_labels=False, prefix="http://vocabularies.unesco.org/thesaurus/") self._graph = self.unesco.get_merged_graph() self._graph.load("claimskg/data/dbpedia_categories_lang_en_skos.ttl", format="turtle") self._sparql_wrapper = sparql_wrapper # type: SPARQLWrapper self._uri_generator = ClaimsKGURIGenerator(model_uri) self._threshold = threshold self._include_body = include_body self._resolve = resolve self._use_caching = use_caching self.model_uri = model_uri self._namespace_manager = NamespaceManager(Graph()) self._claimskg_prefix = rdflib.Namespace(model_uri) self._namespace_manager.bind('claimskg', self._claimskg_prefix, override=False) self._namespace_manager.bind('base', self._claimskg_prefix, override=True) self.counter = TypedCounter() self._rdfs_prefix = rdflib.Namespace( "http://www.w3.org/2000/01/rdf-schema#") self._namespace_manager.bind('rdfs', self._rdfs_prefix, override=False) self._schema_prefix = rdflib.Namespace("http://schema.org/") self._namespace_manager.bind('schema', self._schema_prefix, override=False) self._namespace_manager.bind('owl', OWL, override=True) self._dbo_prefix = rdflib.Namespace("http://dbpedia.org/ontology/") self._namespace_manager.bind("dbo", self._dbo_prefix, override=False) self._dbr_prefix = rdflib.Namespace("http://dbpedia.org/resource/") self._namespace_manager.bind("dbr", self._dbr_prefix, override=False) self._dbc_prefix = rdflib.Namespace( "http://dbpedia.org/resource/Category_") self._namespace_manager.bind("dbc", self._dbr_prefix, override=False) self._dcat_prefix = rdflib.Namespace("http://www.w3.org/ns/dcat#") self._namespace_manager.bind("dcat", self._dcat_prefix, override=False) self._dct_prefix = rdflib.Namespace("http://purl.org/dc/terms/") self._namespace_manager.bind("dct", self._dct_prefix, override=False) self._foaf_prefix = rdflib.Namespace("http://xmlns.com/foaf/0.1/") self._namespace_manager.bind("foaf", self._foaf_prefix, override=False) self._vcard_prefix = rdflib.Namespace( "http://www.w3.org/2006/vcard/ns#") self._namespace_manager.bind("vcard", self._vcard_prefix, override=False) self._adms_prefix = Namespace("http://www.w3.org/ns/adms#") self._namespace_manager.bind("adms", self._adms_prefix, override=False) self._skos_prefix = Namespace("http://www.w3.org/2004/02/skos/core#") self._namespace_manager.bind("skos", self._skos_prefix, override=False) self._owl_same_as = URIRef(OWL['sameAs']) self._schema_claim_review_class_uri = URIRef( self._schema_prefix['ClaimReview']) self._schema_creative_work_class_uri = URIRef( self._schema_prefix['CreativeWork']) self._schema_organization_class_uri = URIRef( self._schema_prefix['Organization']) self._schema_thing_class_uri = URIRef(self._schema_prefix['Thing']) self._schema_rating_class_uri = URIRef(self._schema_prefix['Rating']) self._schema_language_class_uri = URIRef( self._schema_prefix['Language']) self._schema_claim_reviewed_property_uri = URIRef( self._schema_prefix['claimReviewed']) self._schema_url_property_uri = URIRef(self._schema_prefix['url']) self._schema_name_property_uri = URIRef(self._schema_prefix['name']) self._schema_date_published_property_uri = URIRef( self._schema_prefix['datePublished']) self._schema_in_language_preperty_uri = URIRef( self._schema_prefix['inLanguage']) self._schema_author_property_uri = URIRef( self._schema_prefix['author']) self._schema_same_as_property_uri = URIRef( self._schema_prefix['sameAs']) self._schema_citation_preperty_uri = URIRef( self._schema_prefix['citation']) self._schema_item_reviewed_property_uri = URIRef( self._schema_prefix['itemReviewed']) self._schema_alternate_name_property_uri = URIRef( self._schema_prefix['alternateName']) self._schema_description_property_uri = URIRef( self._schema_prefix['description']) self._schema_rating_value_property_uri = URIRef( self._schema_prefix['ratingValue']) self._schema_mentions_property_uri = URIRef( self._schema_prefix['mentions']) self._schema_keywords_property_uri = URIRef( self._schema_prefix['keywords']) self._schema_headline_property_uri = URIRef( self._schema_prefix['headline']) self._schema_review_body_property_uri = URIRef( self._schema_prefix['reviewBody']) self._schema_text_property_uri = URIRef(self._schema_prefix['text']) self._iso1_language_tag = "en" self._iso3_language_tag = "eng" self._english_uri = URIRef(self._claimskg_prefix["language/English"]) self._graph.add( (self._english_uri, RDF.type, self._schema_language_class_uri)) self._graph.add( (self._english_uri, self._schema_alternate_name_property_uri, Literal(self._iso1_language_tag))) self._graph.add((self._english_uri, self._schema_name_property_uri, Literal("English"))) self._nif_prefix = rdflib.Namespace( "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#") self._namespace_manager.bind('nif', self._nif_prefix, override=False) self._nif_RFC5147String_class_uri = URIRef( self._nif_prefix['RFC5147String']) self._nif_context_class_uri = URIRef(self._nif_prefix['Context']) self._nif_source_url_property_uri = URIRef( self._nif_prefix['sourceUrl']) self._nif_begin_index_property_uri = URIRef( self._nif_prefix["beginIndex"]) self._nif_end_index_property_uri = URIRef(self._nif_prefix["endIndex"]) self._nif_is_string_property_uri = URIRef(self._nif_prefix["isString"]) self._its_prefix = rdflib.Namespace( "https://www.w3.org/2005/11/its/rdf#") self._namespace_manager.bind('itsrdf', self._its_prefix, override=False) self.its_ta_confidence_property_uri = URIRef( self._its_prefix['taConfidence']) self.its_ta_ident_ref_property_uri = URIRef( self._its_prefix['taIdentRef']) self._logical_view_claims = [] # type: List[ClaimLogicalView] self._creative_works_index = [] self.keyword_uri_set = set() self.global_statistics = ClaimsKGStatistics() self.per_source_statistics = {}
class ClaimsKGGenerator: def __init__(self, model_uri, sparql_wrapper=None, threshold=0.3, include_body: bool = False, resolve: bool = True, use_caching: bool = False): self._graph = rdflib.Graph() self.thesoz = SkosThesaurusMatcher( self._graph, thesaurus_path="claimskg/data/thesoz-komplett.xml", skos_xl_labels=True, prefix="http://lod.gesis.org/thesoz/") self._graph = self.thesoz.get_merged_graph() self.unesco = SkosThesaurusMatcher( self._graph, thesaurus_path="claimskg/data/unesco-thesaurus.xml", skos_xl_labels=False, prefix="http://vocabularies.unesco.org/thesaurus/") self._graph = self.unesco.get_merged_graph() self._graph.load("claimskg/data/dbpedia_categories_lang_en_skos.ttl", format="turtle") self._sparql_wrapper = sparql_wrapper # type: SPARQLWrapper self._uri_generator = ClaimsKGURIGenerator(model_uri) self._threshold = threshold self._include_body = include_body self._resolve = resolve self._use_caching = use_caching self.model_uri = model_uri self._namespace_manager = NamespaceManager(Graph()) self._claimskg_prefix = rdflib.Namespace(model_uri) self._namespace_manager.bind('claimskg', self._claimskg_prefix, override=False) self._namespace_manager.bind('base', self._claimskg_prefix, override=True) self.counter = TypedCounter() self._rdfs_prefix = rdflib.Namespace( "http://www.w3.org/2000/01/rdf-schema#") self._namespace_manager.bind('rdfs', self._rdfs_prefix, override=False) self._schema_prefix = rdflib.Namespace("http://schema.org/") self._namespace_manager.bind('schema', self._schema_prefix, override=False) self._namespace_manager.bind('owl', OWL, override=True) self._dbo_prefix = rdflib.Namespace("http://dbpedia.org/ontology/") self._namespace_manager.bind("dbo", self._dbo_prefix, override=False) self._dbr_prefix = rdflib.Namespace("http://dbpedia.org/resource/") self._namespace_manager.bind("dbr", self._dbr_prefix, override=False) self._dbc_prefix = rdflib.Namespace( "http://dbpedia.org/resource/Category_") self._namespace_manager.bind("dbc", self._dbr_prefix, override=False) self._dcat_prefix = rdflib.Namespace("http://www.w3.org/ns/dcat#") self._namespace_manager.bind("dcat", self._dcat_prefix, override=False) self._dct_prefix = rdflib.Namespace("http://purl.org/dc/terms/") self._namespace_manager.bind("dct", self._dct_prefix, override=False) self._foaf_prefix = rdflib.Namespace("http://xmlns.com/foaf/0.1/") self._namespace_manager.bind("foaf", self._foaf_prefix, override=False) self._vcard_prefix = rdflib.Namespace( "http://www.w3.org/2006/vcard/ns#") self._namespace_manager.bind("vcard", self._vcard_prefix, override=False) self._adms_prefix = Namespace("http://www.w3.org/ns/adms#") self._namespace_manager.bind("adms", self._adms_prefix, override=False) self._skos_prefix = Namespace("http://www.w3.org/2004/02/skos/core#") self._namespace_manager.bind("skos", self._skos_prefix, override=False) self._owl_same_as = URIRef(OWL['sameAs']) self._schema_claim_review_class_uri = URIRef( self._schema_prefix['ClaimReview']) self._schema_creative_work_class_uri = URIRef( self._schema_prefix['CreativeWork']) self._schema_organization_class_uri = URIRef( self._schema_prefix['Organization']) self._schema_thing_class_uri = URIRef(self._schema_prefix['Thing']) self._schema_rating_class_uri = URIRef(self._schema_prefix['Rating']) self._schema_language_class_uri = URIRef( self._schema_prefix['Language']) self._schema_claim_reviewed_property_uri = URIRef( self._schema_prefix['claimReviewed']) self._schema_url_property_uri = URIRef(self._schema_prefix['url']) self._schema_name_property_uri = URIRef(self._schema_prefix['name']) self._schema_date_published_property_uri = URIRef( self._schema_prefix['datePublished']) self._schema_in_language_preperty_uri = URIRef( self._schema_prefix['inLanguage']) self._schema_author_property_uri = URIRef( self._schema_prefix['author']) self._schema_same_as_property_uri = URIRef( self._schema_prefix['sameAs']) self._schema_citation_preperty_uri = URIRef( self._schema_prefix['citation']) self._schema_item_reviewed_property_uri = URIRef( self._schema_prefix['itemReviewed']) self._schema_alternate_name_property_uri = URIRef( self._schema_prefix['alternateName']) self._schema_description_property_uri = URIRef( self._schema_prefix['description']) self._schema_rating_value_property_uri = URIRef( self._schema_prefix['ratingValue']) self._schema_mentions_property_uri = URIRef( self._schema_prefix['mentions']) self._schema_keywords_property_uri = URIRef( self._schema_prefix['keywords']) self._schema_headline_property_uri = URIRef( self._schema_prefix['headline']) self._schema_review_body_property_uri = URIRef( self._schema_prefix['reviewBody']) self._schema_text_property_uri = URIRef(self._schema_prefix['text']) self._iso1_language_tag = "en" self._iso3_language_tag = "eng" self._english_uri = URIRef(self._claimskg_prefix["language/English"]) self._graph.add( (self._english_uri, RDF.type, self._schema_language_class_uri)) self._graph.add( (self._english_uri, self._schema_alternate_name_property_uri, Literal(self._iso1_language_tag))) self._graph.add((self._english_uri, self._schema_name_property_uri, Literal("English"))) self._nif_prefix = rdflib.Namespace( "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#") self._namespace_manager.bind('nif', self._nif_prefix, override=False) self._nif_RFC5147String_class_uri = URIRef( self._nif_prefix['RFC5147String']) self._nif_context_class_uri = URIRef(self._nif_prefix['Context']) self._nif_source_url_property_uri = URIRef( self._nif_prefix['sourceUrl']) self._nif_begin_index_property_uri = URIRef( self._nif_prefix["beginIndex"]) self._nif_end_index_property_uri = URIRef(self._nif_prefix["endIndex"]) self._nif_is_string_property_uri = URIRef(self._nif_prefix["isString"]) self._its_prefix = rdflib.Namespace( "https://www.w3.org/2005/11/its/rdf#") self._namespace_manager.bind('itsrdf', self._its_prefix, override=False) self.its_ta_confidence_property_uri = URIRef( self._its_prefix['taConfidence']) self.its_ta_ident_ref_property_uri = URIRef( self._its_prefix['taIdentRef']) self._logical_view_claims = [] # type: List[ClaimLogicalView] self._creative_works_index = [] self.keyword_uri_set = set() self.global_statistics = ClaimsKGStatistics() self.per_source_statistics = {} def _create_schema_claim_review(self, row, claim: ClaimLogicalView): claim_review_instance = self._uri_generator.claim_review_uri(row) self._graph.add((claim_review_instance, RDF.type, self._schema_claim_review_class_uri)) # claim_reviewed_value = _normalize_text_fragment(_row_string_value(row, "claimReview_claimReviewed")) # self._graph.add( # (claim_review_instance, self._schema_claim_reviewed_property_uri, # Literal(claim_reviewed_value, # lang=self._iso1_language_tag))) headline_value = _row_string_value(row, "extra_title") if len(headline_value) > 0: self._graph.add( (claim_review_instance, self._schema_headline_property_uri, Literal(headline_value, lang=self._iso1_language_tag))) claim.text_fragments.append(headline_value) claim.has_headline = True # Include body only if the option is enabled body_value = _row_string_value(row, "extra_body") if len(body_value) > 0: claim.has_body_text = True claim.text_fragments.append(_normalize_text_fragment(body_value)) if self._include_body: self._graph.add((claim_review_instance, self._schema_review_body_property_uri, Literal(body_value, lang=self._iso1_language_tag))) claim_review_url = row['claimReview_url'] claim.claim_review_url = claim_review_url if claim_review_url is not None: self._graph.add( (claim_review_instance, self._schema_url_property_uri, URIRef(row['claimReview_url']))) review_date = row['claimReview_datePublished'] if review_date: self._graph.add((claim_review_instance, self._schema_date_published_property_uri, Literal(review_date, datatype=XSD.date))) claim.review_date = datetime.datetime.strptime( review_date, "%Y-%m-%d").date() self._graph.add( (claim_review_instance, self._schema_in_language_preperty_uri, self._english_uri)) return claim_review_instance def _create_organization(self, row, claim): organization = self._uri_generator.organization_uri(row) self._graph.add( (organization, RDF.type, self._schema_organization_class_uri)) claim.claimreview_author = row['claimReview_author_name'] self._graph.add((organization, self._schema_name_property_uri, Literal(row['claimReview_author_name'], lang=self._iso1_language_tag))) author_name = _row_string_value(row, 'claimReview_author_name') if len(author_name) > 0: self._graph.add((organization, self._schema_url_property_uri, URIRef(source_uri_dict[author_name]))) return organization def _create_claims_kg_organization(self): organization = self._uri_generator.claimskg_organization_uri() self._graph.add( (organization, RDF.type, self._schema_organization_class_uri)) self._graph.add((organization, self._schema_name_property_uri, Literal("ClaimsKG"))) self._graph.add((organization, self._schema_url_property_uri, URIRef(self.model_uri))) def _reconcile_keyword_annotations(self, claim, keyword_uri, keyword, matching_annotations, type="thesoz"): for annotation in matching_annotations: self._graph.add((keyword_uri, URIRef(self._dct_prefix["about"]), URIRef(annotation[0]))) if type == "thesoz": claim.keywords_thesoz.add(keyword) else: claim.keywords_unesco.add(keyword) def _reconcile_keyword_mention_with_annotations(self, claim, mention, dbpedia_entity, keyword, matching_annotations, type="thesoz"): start = mention['begin'] end = mention['end'] for matching_annotation in matching_annotations: if start == matching_annotation[2] and end == matching_annotation[ 3]: if type == "thesoz": claim.keywords_thesoz_dbpedia.add(keyword) elif type == "unesco": claim.keywords_unesco_dbpedia.add(keyword) self._graph.add((URIRef(dbpedia_entity), OWL.sameAs, URIRef(matching_annotation[0]))) def _create_creative_work(self, row, claim: ClaimLogicalView): creative_work = self._uri_generator.creative_work_uri(row) self._graph.add( (creative_work, RDF.type, self._schema_creative_work_class_uri)) date_published_value = _row_string_value(row, "creativeWork_datePublished") if len(date_published_value) > 0: self._graph.add( (creative_work, self._schema_date_published_property_uri, Literal(date_published_value, datatype=XSD.date))) claim.claim_date = datetime.datetime.strptime( date_published_value, "%Y-%m-%d").date() keywords = row['extra_tags'] if isinstance(keywords, str) and len(keywords) > 0: keyword_mentions = self._process_json( row['extra_entities_keywords']) if not keyword_mentions: keyword_mentions = [] if ";" in keywords: keyword_list = keywords.split(";") else: keyword_list = keywords.split(",") for keyword in keyword_list: keyword = keyword.strip() keyword_uri = self._uri_generator.keyword_uri(keyword) if keyword_uri not in self.keyword_uri_set: self._graph.add( (keyword_uri, RDF.type, self._schema_thing_class_uri)) self._graph.add( (keyword_uri, self._schema_name_property_uri, Literal(keyword, lang=self._iso1_language_tag))) thesoz_matching_annotations = self.thesoz.find_keyword_matches( keyword) unesco_matching_annotations = self.unesco.find_keyword_matches( keyword) self._reconcile_keyword_annotations( claim, keyword_uri, keyword, thesoz_matching_annotations) self._reconcile_keyword_annotations( claim, keyword_uri, keyword, unesco_matching_annotations, type="unesco") for mention in keyword_mentions: if keyword.lower().strip() in mention['text'].lower( ).strip(): self.keyword_uri_set.add(keyword_uri) mention_instance, dbpedia_entity = self._create_mention( mention, claim, False) if mention_instance: claim.keywords_dbpedia.add(keyword) self._graph.add( (keyword_uri, self._schema_mentions_property_uri, mention_instance)) self._reconcile_keyword_mention_with_annotations( claim, mention, dbpedia_entity, keyword, thesoz_matching_annotations) self._reconcile_keyword_mention_with_annotations( claim, mention, dbpedia_entity, keyword, unesco_matching_annotations, type="unesco") claim.keywords.add(keyword.strip()) self._graph.add( (creative_work, self._schema_keywords_property_uri, keyword_uri)) links = row['extra_refered_links'] author_url = _row_string_value(row, 'claimReview_author_url') if links: links = links[1:-1].split(",") for link in links: stripped_link = link.strip() if len(stripped_link) > 0 and stripped_link[0] != "#" and re.match(_is_valid_url_regex, link.strip()) and link.strip() != \ source_uri_dict[ author_url]: link = link.strip().replace("\\", "").replace( "%20TARGET=prayer>adultery</A>%20was%20made%20public.%20</p>%0A", "").replace("\"", "").replace("<img%20src=?", "").replace( ">", "").replace("</", "").replace("<", "") parsed_url = urlparse(link) is_correct = (all([ parsed_url.scheme, parsed_url.netloc, parsed_url.path ]) and len(parsed_url.netloc.split(".")) > 1 and "<img" not in link) if is_correct: claim.links.append(link) # try: self._graph.add( (creative_work, self._schema_citation_preperty_uri, URIRef(parsed_url.scheme + "://" + parsed_url.netloc + parsed_url.path + "?" + parsed_url.query.replace("|", "%7C"). replace("^", "%5E").replace("\\", "%5C"). replace("{", "%7B").replace("}", "%7D"). replace("&", "%26").replace("=", "%3D")))) # except : # pass # Creative work author instantiation author_value = _row_string_value(row, "creativeWork_author_name") claim.creative_work_author = author_value claim_reviewed_value = _normalize_text_fragment( _row_string_value(row, "claimReview_claimReviewed")) claim.title = claim_reviewed_value self._graph.add((creative_work, self._schema_text_property_uri, Literal(claim_reviewed_value, lang=self._iso1_language_tag))) if len(author_value) > 0: creative_work_author = self._uri_generator.creative_work_author_uri( row) self._graph.add( (creative_work_author, RDF.type, self._schema_thing_class_uri)) author_mentions = self._process_json(row['extra_entities_author']) if not author_mentions: author_mentions = [] for mention in author_mentions: entity_uri = mention['entity'].replace(" ", "_") mention_instance = self._dbr_prefix[entity_uri] if mention_instance: self._graph.add( (creative_work_author, self._schema_mentions_property_uri, mention_instance)) self._graph.add( (creative_work_author, self._schema_name_property_uri, Literal(author_value, lang=self._iso1_language_tag))) self._graph.add((creative_work, self._schema_author_property_uri, creative_work_author)) # Todo: Reconcile author entities with DBPedia # self._graph.add((creative_work_author, self._schema_same_as_property_uri, Literal("dbpedia:link"))) self._creative_works_index.append(creative_work) return creative_work def _create_review_rating(self, row, claim): original_rating = self._uri_generator.create_original_rating_uri(row) rating_alternate_name = row['rating_alternateName'] if rating_alternate_name: escaped_alternate_rating_name = html.escape( row['rating_alternateName']).encode('ascii', 'xmlcharrefreplace') self._graph.add( (original_rating, self._schema_alternate_name_property_uri, Literal(escaped_alternate_rating_name))) self._graph.add( (original_rating, RDF.type, self._schema_rating_class_uri)) rating_value = row['rating_ratingValue'].replace("[", "").replace( "]", "").replace("'", "").replace(",", "").strip() if rating_value and len(rating_value) > 0: value = float(rating_value) self._graph.add( (original_rating, self._schema_rating_value_property_uri, Literal(value, datatype=XSD.float))) organization = self._uri_generator.organization_uri(row) self._graph.add( (original_rating, self._schema_author_property_uri, organization)) normalized_rating_enum = ratings.normalize( _row_string_value(row, "claimReview_author_name").lower(), _row_string_value(row, "rating_alternateName").lower()) claim.normalized_rating = normalized_rating_enum.name normalized_rating = self._uri_generator.create_normalized_rating_uri( normalized_rating_enum) self._graph.add( (normalized_rating, RDF.type, self._schema_rating_class_uri)) self._graph.add( (normalized_rating, self._schema_alternate_name_property_uri, Literal(str(normalized_rating_enum.name), lang=self._iso1_language_tag))) self._graph.add( (normalized_rating, self._schema_rating_value_property_uri, Literal(normalized_rating_enum.value, datatype=XSD.integer))) claimskg_org = self._uri_generator.claimskg_organization_uri() self._graph.add((normalized_rating, self._schema_author_property_uri, claimskg_org)) return original_rating, normalized_rating def _create_mention(self, mention_entry, claim: ClaimLogicalView, in_review): rho_value = float(mention_entry['score']) if rho_value > self._threshold: text = mention_entry['text'] start = mention_entry['begin'] end = mention_entry['end'] entity_uri = mention_entry['entity'].replace(" ", "_") categories = mention_entry['categories'] if len(categories) > 0: categories = categories[0].split(",") mention = self._uri_generator.mention_uri( start, end, text, entity_uri, rho_value, ",".join(claim.text_fragments)) self._graph.add((mention, RDF.type, self._nif_context_class_uri)) self._graph.add( (mention, RDF.type, self._nif_RFC5147String_class_uri)) self._graph.add((mention, self._nif_is_string_property_uri, Literal(text, lang=self._iso1_language_tag))) self._graph.add((mention, self._nif_begin_index_property_uri, Literal(int(start), datatype=XSD.integer))) self._graph.add((mention, self._nif_end_index_property_uri, Literal(int(end), datatype=XSD.integer))) # TODO: Fix values so that they aren't displayed in scientific notation self._graph.add( (mention, self.its_ta_confidence_property_uri, Literal(float(self._format_confidence_score(mention_entry)), datatype=XSD.float))) self._graph.add((mention, self.its_ta_ident_ref_property_uri, self._dbr_prefix[entity_uri])) if in_review: claim.review_entities.append(entity_uri) for category in categories: claim.review_entity_categories.append(category) else: claim.claim_entities.append(entity_uri) for category in categories: claim.claim_entity_categories.append(category) for category in categories: category = category.replace(" ", "_") self._graph.add((mention, URIRef(self._dct_prefix["about"]), URIRef(self._dbc_prefix[category]))) return mention, self._dbr_prefix[entity_uri] else: return None, None @staticmethod def _format_confidence_score(mention_entry): value = float(mention_entry['score']) rounded_to_two_decimals = round(value, 2) return str(rounded_to_two_decimals) def create_contact_vcard(self): atchechmedjiev_contact_vcard = URIRef( self._claimskg_prefix['atchechmedjiev_contact_vcard']) self._graph.add((atchechmedjiev_contact_vcard, RDF.type, URIRef(self._vcard_prefix['Individual']))) self._graph.add( (atchechmedjiev_contact_vcard, self._vcard_prefix['hasEmail'], URIRef("mailto:[email protected]"))) self._graph.add( (atchechmedjiev_contact_vcard, self._vcard_prefix['fn'], Literal("Andon Tchechmedjiev"))) return atchechmedjiev_contact_vcard def add_dcat_metadata(self): claimskg = rdflib.term.URIRef(self._claimskg_prefix['claimskg']) self._graph.add((claimskg, RDF.type, rdflib.term.URIRef(self._dcat_prefix['Dataset']))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dct_prefix['title']), Literal("ClaimsKG"))) self._graph.add(( claimskg, rdflib.term.URIRef(self._dct_prefix['description']), Literal("ClaimsKG: A Live Knowledge Graph ofFact-Checked Claims"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dct_prefix['issued']), rdflib.term.Literal("2019-04-10", datatype=XSD.date))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dct_prefix['modified']), rdflib.term.Literal(datetime.datetime.now(), datatype=XSD.date))) doi_org = URIRef(self._claimskg_prefix['doi_org_instance']) self._graph.add( (doi_org, RDF.type, URIRef(self._foaf_prefix['Organization']))) self._graph.add( (doi_org, RDFS.label, Literal("International DOI Foundation"))) self._graph.add((doi_org, self._foaf_prefix['homepage'], URIRef("https://www.doi.org/"))) identifier = URIRef(self._claimskg_prefix['doi_identifier']) self._graph.add( (identifier, RDF.type, self._adms_prefix['Identifier'])) self._graph.add((identifier, self._skos_prefix['notation'], URIRef("https://doi.org/10.5281/zenodo.2628745"))) self._graph.add((identifier, self._adms_prefix['schemaAgency'], Literal("International DOI Foundation"))) self._graph.add((identifier, self._dct_prefix['creator'], doi_org)) self._graph.add( (claimskg, rdflib.term.URIRef(self._dct_prefix['identifier']), rdflib.term.Literal("10.5281/zenodo.2628745"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dct_prefix['language']), rdflib.term.URIRef("http://id.loc.gov/vocabulary/iso639-1/en"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dct_prefix['accrualPeriodicity']), URIRef("http://purl.org/linked-data/sdmx/2009/code#freq-M"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dcat_prefix['keyword']), Literal("Claims"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dcat_prefix['keyword']), Literal("Facts"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dcat_prefix['keyword']), Literal("Fact-checking"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dcat_prefix['keyword']), Literal("Knowledge Graphs"))) self._graph.add( (claimskg, rdflib.term.URIRef(self._dcat_prefix['contactPoint']), self.create_contact_vcard())) # SPARQL Distribution sparql_claimskg_distribution = URIRef( self._claimskg_prefix['sparql_claimskg_distribution']) self._graph.add((sparql_claimskg_distribution, RDF.type, self._dcat_prefix['Distribution'])) self._graph.add( (sparql_claimskg_distribution, self._dct_prefix['title'], Literal("SPARQL endpoint"))) self._graph.add( (sparql_claimskg_distribution, self._dct_prefix['description'], Literal("The ClaimsKG SPARQL endpoint"))) self._graph.add((sparql_claimskg_distribution, rdflib.term.URIRef(self._dct_prefix['issued']), rdflib.term.Literal("2019-04-10", datatype=XSD.date))) self._graph.add((sparql_claimskg_distribution, rdflib.term.URIRef(self._dct_prefix['modified']), rdflib.term.Literal(datetime.datetime.now(), datatype=XSD.date))) licence_document = URIRef( "https://creativecommons.org/licenses/by/4.0/") self._graph.add( (licence_document, RDF.type, self._dct_prefix['LicenseDocument'])) self._graph.add((sparql_claimskg_distribution, rdflib.term.URIRef(self._dct_prefix['license']), licence_document)) self._graph.add((sparql_claimskg_distribution, rdflib.term.URIRef(self._dcat_prefix['accessURL']), Literal("https://data.gesis.org/claimskg/sparql"))) # Source code distribution sourcecode_claimskg_distribution = URIRef( self._claimskg_prefix['sourcecode_claimskg_distribution']) self._graph.add((sourcecode_claimskg_distribution, RDF.type, self._dcat_prefix['Distribution'])) self._graph.add( (sourcecode_claimskg_distribution, self._dct_prefix['title'], Literal("SPARQL endpoint"))) self._graph.add( (sourcecode_claimskg_distribution, self._dct_prefix['description'], Literal("The ClaimsKG Github repository group"))) self._graph.add((sourcecode_claimskg_distribution, rdflib.term.URIRef(self._dct_prefix['issued']), rdflib.term.Literal("2019-04-10", datatype=XSD.date))) self._graph.add((sourcecode_claimskg_distribution, rdflib.term.URIRef(self._dct_prefix['modified']), rdflib.term.Literal(datetime.datetime.now(), datatype=XSD.date))) self._graph.add((sourcecode_claimskg_distribution, rdflib.term.URIRef(self._dct_prefix['license']), licence_document)) self._graph.add((sourcecode_claimskg_distribution, rdflib.term.URIRef(self._dcat_prefix['accessURL']), Literal("https://github.com/claimskg"))) def generate_model(self, dataset_rows): row_counter = 0 self._graph.namespace_manager = self._namespace_manager total_entry_count = len(dataset_rows) self.add_dcat_metadata() progress_bar = tqdm(total=total_entry_count) for row in dataset_rows: row_counter += 1 progress_bar.update(1) logical_claim = ClaimLogicalView( ) # Instance holding claim raw information for mapping generation source_site = _row_string_value(row, 'claimReview_author_name') if source_site not in self.per_source_statistics.keys(): self.per_source_statistics[source_site] = ClaimsKGStatistics() claim_review_instance = self._create_schema_claim_review( row, logical_claim) organization = self._create_organization(row, logical_claim) self._graph.add((claim_review_instance, self._schema_author_property_uri, organization)) creative_work = self._create_creative_work(row, logical_claim) self._graph.add( (claim_review_instance, self._schema_item_reviewed_property_uri, creative_work)) logical_claim.creative_work_uri = creative_work original, normalized = self._create_review_rating( row, logical_claim) self._graph.add( (claim_review_instance, rdflib.term.URIRef(self._schema_prefix['reviewRating']), original)) self._graph.add( (claim_review_instance, rdflib.term.URIRef(self._schema_prefix['reviewRating']), normalized)) # For claim review mentions entities_json = row[ 'extra_entities_claimReview_claimReviewed'] # type: str loaded_json = self._process_json(entities_json) if loaded_json: for mention_entry in loaded_json: mention, dbpedia_entity = self._create_mention( mention_entry, logical_claim, True) if mention: self._graph.add( (creative_work, self._schema_mentions_property_uri, mention)) # For Creative Work mentions body_entities_json = row['extra_entities_body'] loaded_body_json = self._process_json(body_entities_json) if loaded_body_json: for mention_entry in loaded_body_json: mention, dbpedia_entity = self._create_mention( mention_entry, logical_claim, False) if mention: self._graph.add( (claim_review_instance, self._schema_mentions_property_uri, mention)) self._logical_view_claims.append(logical_claim) self.global_statistics.compute_stats_for_review(logical_claim) self.per_source_statistics[source_site].compute_stats_for_review( logical_claim) progress_bar.close() def _process_json(self, json_string): loaded_json = [] if json_string: json_string = re.sub("\",\"\"", ",\"", json_string) json_string = re.sub('"\n\t\"', "", json_string) json_string = re.sub('}\]\[\]', '}]', json_string) if json_string == "[[][]]": loaded_json = [] else: try: loaded_json = json.loads(json_string) except ValueError: loaded_json = None return loaded_json def export_rdf(self, format): print("\nGlobal dataset statistics") self.global_statistics.output_stats() print("\nPer source site statistics") for site in self.per_source_statistics.keys(): print("\n\n{site} statistics...".format(site=site)) self.per_source_statistics[site].output_stats() graph_serialization = self._graph.serialize(format=format, encoding='utf-8') return graph_serialization def reconcile_claims(self, embeddings, theta, keyword_weight, link_weight, text_weight, entity_weight, mappings_file_path=None, seed=None, samples=None): reconciler = FactReconciler(embeddings, self._use_caching, mappings_file_path, self._logical_view_claims, theta, keyword_weight, link_weight, text_weight, entity_weight, seed=seed, samples=samples) mappings = reconciler.generate_mappings() for mapping in mappings: if mapping is not None and mapping[ 1] is not None and mapping[1] != (None, None): source = mapping[1][0] target = mapping[1][1] self._graph.add((source.creative_work_uri, OWL.sameAs, target.creative_work_uri)) def materialize_indirect_claim_links(self): mdg = rdflib_to_networkx_multidigraph(self._graph) def align_duplicated(self): count = len(self._logical_view_claims) total = int(count * (count - 1) / 2) result = [ pair for pair in tqdm(itertools.combinations(range(count), 2), total=total) if self.compare_claim_titles(self._logical_view_claims[pair[0]], self._logical_view_claims[pair[1]]) ] for pair in result: self._graph.add( (self._creative_works_index[pair[0]], self._owl_same_as, self._creative_works_index[pair[1]])) self.global_statistics.count_mapping() self.per_source_statistics[self._logical_view_claims[ pair[0]].claimreview_author].count_mapping() def compare_claim_titles(self, claim_a, claim_b): return self._normalize_label(claim_a.title) == self._normalize_label( claim_b.title) def _normalize_label(self, label): return label.strip().lower().replace("\"", "").replace("'", "")
def __init__(self): self.base = None self.namespace_manager = NamespaceManager( Graph()) # ns man needs a store
def build_graph_and_post(reginfo_obj, regitems_obj, user=None, passwd=None, mode='single', emitFile=False, registry_auth_url=None, updateOnlineRegisters=False, verbose=False): if reginfo_obj == False or regitems_obj == False: return False ns_prefix_lookup = { "description": 'dct', "source": 'dct', "definition": 'skos', "broader": 'skos', "notation": 'reg', "note": 'skos', "altLabel": 'skos', "hiddenLabel": 'skos', "exactMatch": 'skos', "label": 'rdfs', } prefixes_g = rdflib.Graph() if verbose: print("Prefix file...") print(__file__) PREFIX_FILE = pkg_resources.resource_filename("ldrpyutils", 'data/prefixes.ttl') #if(pkg_resources.resource_exists("ldrpyutils", 'data/prefixes.ttl')): # if verbose: # print("Prefix file exists") # print(pkg_resources.resource_string("ldrpyutils", 'data/prefixes.ttl')) #else: # if verbose: # print("Prefix file does not exist!") if verbose: print(PREFIX_FILE) with open(PREFIX_FILE) as f: #read_data = f.read() prefixes_g.parse(f, format="ttl") nsMgr = NamespaceManager(prefixes_g) all_ns = [n for n in nsMgr.namespaces()] prefix_idx = {} for prefix, namespace in all_ns: #print (prefix, namespace.n3()) prefix_idx[prefix] = Namespace(namespace) g = None status = { "didEmitFile": False, "didUpdateOnlineRegisters": False, "isSuccessful": False, } if mode == 'single': register_id = reginfo_obj['id'] register_url = reginfo_obj['registry_location'] reglabel = reginfo_obj['label'] regdescription = reginfo_obj['description'] register_url = reginfo_obj['registry_location'] (parent_reg_url, sub_reg_id ) = get_register_location_parent_and_subreg_url(register_url) subreg_graph = get_subregister_graph(sub_reg_id, reglabel, regdescription, prefix_idx, nsMgr) subreg_data = subreg_graph.serialize(None, format='turtle') if verbose: print("Outputting register graph for " + sub_reg_id) print(subreg_data) g = get_register_graph(sub_reg_id, reginfo_obj, regitems_obj[sub_reg_id], nsMgr, prefix_idx, ns_prefix_lookup) data = g.serialize(None, format='turtle') if verbose: print("Outputting graph for " + sub_reg_id) print(data) if emitFile or updateOnlineRegisters: filename = sub_reg_id + ".ttl" g.serialize(filename, format="turtle") status['didEmitFile'] = True if updateOnlineRegisters: # use the file to update the registers resFlag = post_update_to_online_register( sub_reg_id, parent_reg_url, register_url, data, subreg_data, registry_auth_url=registry_auth_url, user=user, passwd=passwd, verbose=verbose) status['didUpdateOnlineRegisters'] = resFlag if resFlag == False: status['isSuccessful'] = False else: status['isSuccessful'] = True else: #assume multi register for key in reginfo_obj: register_id = key register_url = reginfo_obj[key]['registry_location'] reglabel = reginfo_obj[key]['label'] regdescription = reginfo_obj[key]['description'] register_url = reginfo_obj[key]['registry_location'] (parent_reg_url, sub_reg_id ) = get_register_location_parent_and_subreg_url(register_url) subreg_graph = get_subregister_graph(sub_reg_id, reglabel, regdescription, prefix_idx, nsMgr) subreg_data = subreg_graph.serialize(None, format='turtle') g = get_register_graph(sub_reg_id, reginfo_obj[key], regitems_obj[key], nsMgr, prefix_idx, ns_prefix_lookup) data = g.serialize(format='turtle') status['didEmitFile'] = True if verbose: print(data) if emitFile: filename = sub_reg_id + ".ttl" g.serialize(filename, format="turtle") if updateOnlineRegisters: #use the file to update the registers resFlag = post_update_to_online_register( sub_reg_id, parent_reg_url, register_url, data, subreg_data, registry_auth_url=registry_auth_url, user=user, passwd=passwd, verbose=verbose) status['didUpdateOnlineRegisters'] = resFlag if resFlag == False: status['isSuccessful'] = False else: status['isSuccessful'] = True return (g, status)
# Extra Namespaces ADMS = Namespace('http://www.w3.org/ns/adms#') DCAT = Namespace('http://www.w3.org/ns/dcat#') HYDRA = Namespace('http://www.w3.org/ns/hydra/core#') SCHEMA = Namespace('http://schema.org/') SCV = Namespace('http://purl.org/NET/scovo#') SPDX = Namespace('http://spdx.org/rdf/terms#') VCARD = Namespace('http://www.w3.org/2006/vcard/ns#') FREQ = Namespace('http://purl.org/cld/freq/') EUFREQ = Namespace( 'http://publications.europa.eu/resource/authority/frequency/' ) # noqa: E501 DCT = DCTERMS # More common usage namespace_manager = NamespaceManager(Graph()) namespace_manager.bind('dcat', DCAT) namespace_manager.bind('dct', DCT) namespace_manager.bind('foaf', FOAF) namespace_manager.bind('foaf', FOAF) namespace_manager.bind('hydra', HYDRA) namespace_manager.bind('rdfs', RDFS) namespace_manager.bind('scv', SCV) namespace_manager.bind('skos', SKOS) namespace_manager.bind('vcard', VCARD) namespace_manager.bind('xsd', XSD) namespace_manager.bind('freq', FREQ) # Support JSON-LD in format detection FORMAT_MAP = SUFFIX_FORMAT_MAP.copy() FORMAT_MAP['json'] = 'json-ld'
core_namespaces = { 'dc' : rdflib.namespace.DC, 'dcterms' : rdflib.namespace.DCTERMS, 'ebucore' : Namespace( 'http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#'), 'fcrepo' : Namespace('http://fedora.info/definitions/fcrepo#'), 'fcadmin' : Namespace('info:fcsystem/graph/admin'), 'fcres' : Namespace('info:fcres'), 'fcmain' : Namespace('info:fcsystem/graph/userdata/_main'), 'fcstruct' : Namespace('info:fcsystem/graph/structure'), 'fcsystem' : Namespace('info:fcsystem/'), 'foaf': Namespace('http://xmlns.com/foaf/0.1/'), 'iana' : Namespace('http://www.iana.org/assignments/relation/'), 'ldp' : Namespace('http://www.w3.org/ns/ldp#'), 'pcdm': Namespace('http://pcdm.org/models#'), 'premis' : Namespace('http://www.loc.gov/premis/rdf/v1#'), 'rdf' : rdflib.namespace.RDF, 'rdfs' : rdflib.namespace.RDFS, 'webac' : Namespace('http://www.w3.org/ns/auth/acl#'), 'xsd' : rdflib.namespace.XSD, } ns_collection = {pfx: Namespace(ns) for pfx, ns in config['namespaces'].items()} ns_collection.update(core_namespaces) ns_mgr = NamespaceManager(Graph()) # Collection of prefixes in a dict. for ns,uri in ns_collection.items(): ns_mgr.bind(ns, uri, override=False)