def test_add_to_graph_not_supported(custom_bundle): dep_desc = Descriptor.load(''' id: dep includes: - http://example.com/ctx ''') test_desc = Descriptor.load(''' id: test dependencies: - dep ''') depgraph = ConjunctiveGraph() ctx_graph = depgraph.get_context('http://example.com/ctx') quad = (URIRef('http://example.org/sub'), URIRef('http://example.org/prop'), URIRef('http://example.org/obj'), ctx_graph) depgraph.add(quad) with custom_bundle(dep_desc, graph=depgraph) as depbun, \ custom_bundle(test_desc, bundles_directory=depbun.bundles_directory) as testbun, \ Bundle('test', bundles_directory=testbun.bundles_directory) as bnd: with pytest.raises(ZODB.POSException.ReadOnlyError): with transaction.manager: bnd.rdf.add((URIRef('http://example.org/sub'), URIRef('http://example.org/prop'), URIRef('http://example.org/obj')))
def test_transitive_dep_null_context_triples_no_imports(custom_bundle): dep_dep_desc = Descriptor.load(''' id: dep_dep includes: - http://example.com/ctx ''') dep_desc = Descriptor.load(''' id: dep dependencies: - dep_dep ''') test_desc = Descriptor.load(''' id: test dependencies: - dep ''') depgraph = ConjunctiveGraph() ctx_graph = depgraph.get_context('http://example.com/ctx') quad = (URIRef('http://example.org/sub'), URIRef('http://example.org/prop'), URIRef('http://example.org/obj'), ctx_graph) depgraph.add(quad) with custom_bundle(dep_dep_desc, graph=depgraph) as depdepbun, \ custom_bundle(dep_desc, bundles_directory=depdepbun.bundles_directory) as depbun, \ custom_bundle(test_desc, bundles_directory=depbun.bundles_directory) as testbun, \ Bundle('test', bundles_directory=testbun.bundles_directory) as bnd: assert set([quad[:3]]) == set(bnd.rdf.triples((None, None, None)))
def convert_gml(self, ttl_output_file, uri_part, specific_part): """ Pelagios conversion GML to TTL @type ttl_output_file: string @param ttl_output_file: Absolute path to TTL output file @type uri_part: string @param uri_part: URI for the region to be displayed (e.g. http://earkdev.ait.ac.at/earkweb/sip2aip/working_area/sip2aip/34809536-b9f8-4c51-83d1-ef365ca658f5/) @type specific_part: string @param specific_part: Specific part that distinguishes the URI from other URIs (e.g. 1994) """ cito_ns = Namespace("http://purl.org/spar/cito") cnt_ns = Namespace("http://www.w3.org/2011/content#") dcterms_ns = Namespace("http://purl.org/dc/terms/") foaf_ns = Namespace("http://xmlns.com/foaf/0.1/") geo_ns = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#") geosparql_ns = Namespace("http://www.opengis.net/ont/geosparql#") gn_ns = Namespace("http://www.geonames.org/ontology#") lawd_ns = Namespace("http://lawd.info/ontology/") rdfs_ns = Namespace("http://www.w3.org/2000/01/rdf-schema#") skos_ns = Namespace("http://www.w3.org/2004/02/skos/core#") slovenia = URIRef("http://earkdev.ait.ac.at/earkweb/sip2aip/working_area/sip2aip/5c6f5563-7665-4719-a2b6-4356ea033c1d/#place/Slovenia") store = IOMemory() g = ConjunctiveGraph(store=store) g.bind("cito", cito_ns) g.bind("cnt", cnt_ns) g.bind("dcterms", dcterms_ns) g.bind("foaf", foaf_ns) g.bind("geo", geo_ns) g.bind("geosparql", geosparql_ns) g.bind("gn", gn_ns) g.bind("lawd", lawd_ns) g.bind("rdfs", rdfs_ns) g.bind("skos", skos_ns) graph_slovenian_districts = Graph(store=store, identifier=slovenia) gml_to_wkt = GMLtoWKT(self.gml_file) district_included = {} i = 1 print "Processing GML file: %s" % self.gml_file for district_wkt in gml_to_wkt.get_wkt_linear_ring(): techname = whsp_to_unsc(district_wkt["name"]) print "District %d: %s" % (i, whsp_to_unsc(district_wkt["name"])) if techname not in district_included: district = URIRef("%s#place/%s/%s" % (uri_part, whsp_to_unsc(district_wkt["name"]), specific_part)) graph_slovenian_districts.add((district, RDF.type, lawd_ns.Place)) graph_slovenian_districts.add((district, dcterms_ns['isPartOf'], slovenia)) graph_slovenian_districts.add((district, dcterms_ns['temporal'], Literal(str(district_wkt["year"])))) graph_slovenian_districts.add((district, gn_ns['countryCode'], Literal(u'SI'))) graph_slovenian_districts.add((district, rdfs_ns['label'], Literal(district_wkt["name"], lang=u'si'))) polygons = BNode() graph_slovenian_districts.add((district, geosparql_ns['hasGeometry'], polygons)) g.add((polygons, geosparql_ns['asWKT'], Literal(district_wkt["polygon"]))) district_included[techname] = True i += 1 with open(ttl_output_file, 'w') as f: f.write(g.serialize(format='n3')) f.close()
def test_triples_choices_context_not_included(custom_bundle): dep_desc = Descriptor.load(''' id: dep includes: - http://example.com/ctxg ''') test_desc = Descriptor.load(''' id: test dependencies: - dep ''') depgraph = ConjunctiveGraph() ctx_graph = depgraph.get_context('http://example.com/ctx') quad = (URIRef('http://example.org/sub'), URIRef('http://example.org/prop'), URIRef('http://example.org/obj'), ctx_graph) depgraph.add(quad) with custom_bundle(dep_desc, graph=depgraph) as depbun, \ custom_bundle(test_desc, bundles_directory=depbun.bundles_directory) as testbun, \ Bundle('test', bundles_directory=testbun.bundles_directory) as bnd: match = False for x in bnd.rdf.triples_choices((URIRef('http://example.org/sub'), URIRef('http://example.org/prop'), [URIRef('http://example.org/obj')]), context=ctx_graph): match = True assert not match
def main(fd, store_type=None, store_id=None, graph_id=None, gzipped=False): """ Converts MARC21 data stored in fd to a RDFlib graph. """ from rdflib import plugin if store_type: msg = "Need a {} identifier for a disk-based store." assert store_id, msg.format('store') assert graph_id, msg.format('graph') store = plugin.get(store_type, Store)(store_id) else: store = 'default' graph = Graph(store=store, identifier=graph_id) try: records = MARCReader(open(fd)) for i, triple in enumerate(process_records(records)): graph.add(triple) if i % 100 == 0: graph.commit() if i % 10000 == 0: print i finally: graph.commit() return graph
def make_graph(): g = Graph() # add namespaces g.bind("inpho", "http://inpho.cogs.indiana.edu/") g.bind("thinker", "http://inpho.cogs.indiana.edu/thinker/") g.bind("journal", "http://inpho.cogs.indiana.edu/journal/") g.bind("foaf", "http://xmlns.com/foaf/0.1/") g.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") g.bind("rdfs", "http://www.w3.org/TR/rdf-schema/#") g.bind("owl", "http://www.w3.org/2002/07/owl#") g.bind("idea", "http://inpho.cogs.indiana.edu/idea/") g.bind("skos", "http://www.w3.org/2004/02/skos/core#") g.bind ("db", "http://dbpedia.org/") g.bind ("dc", "http://purl.org/dc/elements/1.1/") # user namespace currently doesn't exist? g.bind("user", "http://inpho.cogs.indiana.edu/user/") # OWL disjoints disjoint_objects = ["thinker", "journal", "idea", "user"] for a, b in combinations(disjoint_objects, 2): g.add((inpho[a], owl['disjointWith'], inpho[b])) g = populate_thinkers(g) g = populate_ideas(g) g = populate_journals(g) return g
def output_to_oac(fileid, dir, metadata, annotations): """ TODO """ # import libraries from rdflib import Namespace, BNode, Literal, URIRef,RDF,RDFS from rdflib.graph import Graph, ConjunctiveGraph from rdflib.plugins.memory import IOMemory # declare namespaces oac = Namespace("http://www.w3.org/ns/oa#") perseus = Namespace("http://data.perseus.org/citations/") myanno = Namespace("http://hellespont.org/annotations/jstor") store = IOMemory() # initialise the graph g = ConjunctiveGraph(store=store) # bind namespaces g.bind("oac",oac) g.bind("perseus",perseus) g.bind("myanno",myanno) for n,ann in enumerate(metadata["citations"]): anno1 = URIRef(myanno["#%i"%n]) g.add((anno1, RDF.type,oac["Annotation"])) g.add((anno1, oac["hasTarget"],URIRef("%s%s"%("http://jstor.org/stable/",metadata["doi"])))) g.add((anno1, RDFS.label, Literal(ann["label"]))) g.add((anno1,oac["hasBody"],perseus[ann["ctsurn"]])) g.add((anno1,oac["motivatedBy"],oac["linking"])) fname="%s%s"%(dir, fileid.replace(".txt",".ttl")) f=open(fname,"w") f.write(g.serialize(format="turtle")) f.close() return
def writeFile(self, stmts, ctx, fileWords): outfile = "commentstore/post-%s.nt" % ("-".join(fileWords)) graph = ConjunctiveGraph() graph.add(*stmts, **{'context' : ctx}) graph.graph.serialize(outfile, format='n3') log.info("wrote new comment to %s", outfile)
def test_pretty_xmlliteral(self): # given: g = ConjunctiveGraph() g.add((BNode(), RDF.value, Literal(u'''<p xmlns="http://www.w3.org/1999/xhtml">See also <a href="#aring">Å</a></p>''', datatype=RDF.XMLLiteral))) # when: xmlrepr = g.serialize(format='pretty-xml') # then: assert u'''<rdf:value rdf:parseType="Literal"><p xmlns="http://www.w3.org/1999/xhtml">See also <a href="#aring">Å</a></p></rdf:value>'''.encode('utf-8') in xmlrepr
def test_pretty_broken_xmlliteral(self): # given: g = ConjunctiveGraph() g.add((BNode(), RDF.value, Literal(u'''<p ''', datatype=RDF.XMLLiteral))) # when: xmlrepr = g.serialize(format='pretty-xml') # then: assert u'''<rdf:value rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"><p '''.encode('utf-8') in xmlrepr
def _mangled_copy(g): "Makes a copy of the graph, replacing all bnodes with the bnode ``_blank``." gcopy = ConjunctiveGraph() isbnode = lambda v: isinstance(v, BNode) for s, p, o in g: if isbnode(s): s = _blank if isbnode(p): p = _blank if isbnode(o): o = _blank gcopy.add((s, p, o)) return gcopy
def test_pretty_broken_xmlliteral(self): # given: g = ConjunctiveGraph() g.add((BNode(), RDF.value, Literal("""<p """, datatype=RDF.XMLLiteral))) # when: xmlrepr = g.serialize(format="pretty-xml") # then: assert ( """<rdf:value rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"><p """ .encode("utf-8") in xmlrepr)
def writeFile(self, stmts, ctx, fileWords): g = ConjunctiveGraph() doc = {'ctx' : ctx} for s in stmts: g.add(s) if s[1] == SIOC.has_reply: doc['topic'] = s[0] if s[1] == DCTERMS.created: # expecting 2 of these, but same value doc['created'] = parse(s[2]) doc['n3'] = g.serialize(format="n3") self.mongo['comment'].insert(doc, safe=True)
def test_escaping_of_triple_doublequotes(): """ Issue 186 - Check escaping of multiple doublequotes. A serialization/deserialization roundtrip of a certain class of Literals fails when there are both, newline characters and multiple subsequent quotation marks in the lexical form of the Literal. In this case invalid N3 is emitted by the serializer, which in turn cannot be parsed correctly. """ g=ConjunctiveGraph() g.add((URIRef('http://foobar'), URIRef('http://fooprop'), Literal('abc\ndef"""""'))) # assert g.serialize(format='n3') == '@prefix ns1: <http:// .\n\nns1:foobar ns1:fooprop """abc\ndef\\"\\"\\"\\"\\"""" .\n\n' g2=ConjunctiveGraph() g2.parse(data=g.serialize(format='n3'), format='n3') assert g.isomorphic(g2) is True
def test_escaping_of_triple_doublequotes(): """ Issue 186 - Check escaping of multiple doublequotes. A serialization/deserialization roundtrip of a certain class of Literals fails when there are both, newline characters and multiple subsequent quotation marks in the lexical form of the Literal. In this case invalid N3 is emitted by the serializer, which in turn cannot be parsed correctly. """ g = ConjunctiveGraph() g.add((URIRef('http://foobar'), URIRef('http://fooprop'), Literal('abc\ndef"""""'))) # assert g.serialize(format='n3') == '@prefix ns1: <http:// .\n\nns1:foobar ns1:fooprop """abc\ndef\\"\\"\\"\\"\\"""" .\n\n' g2 = ConjunctiveGraph() g2.parse(data=g.serialize(format='n3'), format='n3') assert g.isomorphic(g2) is True
def infer(graph: ConjunctiveGraph, rules: ConjunctiveGraph): """ returns new graph of inferred statements. """ log.info( f'Begin inference of graph len={len(graph)} with rules len={len(rules)}:' ) workingSet = ConjunctiveGraph() workingSet.addN(graph.quads()) implied = ConjunctiveGraph() delta = 1 while delta > 0: delta = -len(implied) for r in rules: if r[1] == LOG['implies']: containsSetup = all(st in workingSet for st in r[0]) if containsSetup: log.info(f' Rule {r[0]} -> present={containsSetup}') for st in r[0]: log.info( f' {st[0].n3()} {st[1].n3()} {st[2].n3()}') log.info(f' ...implies {len(r[2])} statements') if containsSetup: for st in r[2]: workingSet.add(st) implied.add(st) else: log.info(f' {r}') delta += len(implied) log.info(f' this inference round added {delta} more implied stmts') log.info(f'{len(implied)} stmts implied:') for st in implied: log.info(f' {st}') return implied # based on fuxi/tools/rdfpipe.py target = Graph() tokenSet = generateTokenSet(graph) with _dontChangeRulesStore(rules): network = ReteNetwork(rules, inferredTarget=target) network.feedFactsToAdd(tokenSet) return target
def write_graph(data_handle, out_handle, format='n3'): graph = Graph() count = 0 for record in generate_records(data_handle): count += 1 if count % 1000: sys.stderr.write(".") else: sys.stderr.write(str(count)) for triple in get_triples(record): graph.add(triple) graph.commit() current_site = Site.objects.get_current() domain = 'https://%s' % current_site.domain out_handle.write(graph.serialize(format=format, base=domain, include_base=True)) return count
def test_pretty_xmlliteral(self): # given: g = ConjunctiveGraph() g.add(( BNode(), RDF.value, Literal( u"""<p xmlns="http://www.w3.org/1999/xhtml">See also <a href="#aring">Å</a></p>""", datatype=RDF.XMLLiteral, ), )) # when: xmlrepr = g.serialize(format="pretty-xml") # then: assert ( u"""<rdf:value rdf:parseType="Literal"><p xmlns="http://www.w3.org/1999/xhtml">See also <a href="#aring">Å</a></p></rdf:value>""" .encode("utf-8") in xmlrepr)
def post(self): query = self.request.get("content") nrOfResults = self.request.get("amount") try: number = int(nrOfResults) except ValueError: number = 0 literals = re.findall(r'"(.+?)"',query) urls = processLiterals(literals, number) graph = ConjunctiveGraph() for url in urls: # Original URL fetch xmlresult = urlfetch.fetch(url,deadline=60,method=urlfetch.GET) if xmlresult.status_code == 200: iwa = Namespace('http://iwa2012-18-2.appspot.com/#') idns = Namespace('http://iwa2012-18-2.appspot.com/id/#') venuens = Namespace('http://iwa2012-18-2.appspot.com/venueid/#') tree = etree.fromstring(xmlresult.content) for event in tree.findall('events/event'): id = event.attrib['id'] title = event.find('title') url = event.find('url') venueid = event.find('venue_id') venueurl = event.find('venue_url') venuename = event.find('venue_name') graph.add((idns[id], iwa['hasTitle'], Literal(title.text))) graph.add((idns[id], iwa['hasUrl'], Literal(url.text))) graph.add((venuens[id], iwa['hasVenueName'], Literal(venuename.text))) graph.add((venuens[id], iwa['hasUrl'], Literal(venueurl.text))) graph.add((idns[id], iwa['atVenue'], venuens[id]))) else: print "Something went wrong with the connection to the Eventful server. Status code: " + xml.status_code print graph.serialize()
def rdf_description(name, notation='xml' ): """ Funtion takes title of node, and rdf notation. """ valid_formats = ["xml", "n3", "ntriples", "trix"] default_graph_uri = "http://gstudio.gnowledge.org/rdfstore" configString = "/var/tmp/rdfstore" # Get the Sleepycat plugin. store = plugin.get('Sleepycat', Store)('rdfstore') # Open previously created store, or create it if it doesn't exist yet graph = Graph(store="Sleepycat", identifier = URIRef(default_graph_uri)) path = mkdtemp() rt = graph.open(path, create=False) if rt == NO_STORE: #There is no underlying Sleepycat infrastructure, create it graph.open(path, create=True) else: assert rt == VALID_STORE, "The underlying store is corrupt" # Now we'll add some triples to the graph & commit the changes rdflib = Namespace('http://sbox.gnowledge.org/gstudio/') graph.bind("gstudio", "http://gnowledge.org/") exclusion_fields = ["id", "rght", "node_ptr_id", "image", "lft", "_state", "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields"] node=Objecttype.objects.get(title=name) node_dict=node.__dict__ subject=str(node_dict['id']) for key in node_dict: if key not in exclusion_fields: predicate=str(key) pobject=str(node_dict[predicate]) graph.add((rdflib[subject], rdflib[predicate], Literal(pobject))) graph.commit() print graph.serialize(format=notation) graph.close()
def rdf_description(name, notation='xml'): """ Funtion takes title of node, and rdf notation. """ valid_formats = ["xml", "n3", "ntriples", "trix"] default_graph_uri = "http://gstudio.gnowledge.org/rdfstore" configString = "/var/tmp/rdfstore" # Get the Sleepycat plugin. store = plugin.get('Sleepycat', Store)('rdfstore') # Open previously created store, or create it if it doesn't exist yet graph = Graph(store="Sleepycat", identifier=URIRef(default_graph_uri)) path = mkdtemp() rt = graph.open(path, create=False) if rt == NO_STORE: #There is no underlying Sleepycat infrastructure, create it graph.open(path, create=True) else: assert rt == VALID_STORE, "The underlying store is corrupt" # Now we'll add some triples to the graph & commit the changes rdflib = Namespace('http://sbox.gnowledge.org/gstudio/') graph.bind("gstudio", "http://gnowledge.org/") exclusion_fields = [ "id", "rght", "node_ptr_id", "image", "lft", "_state", "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields" ] node = Objecttype.objects.get(title=name) node_dict = node.__dict__ subject = str(node_dict['id']) for key in node_dict: if key not in exclusion_fields: predicate = str(key) pobject = str(node_dict[predicate]) graph.add((rdflib[subject], rdflib[predicate], Literal(pobject))) graph.commit() print graph.serialize(format=notation) graph.close()
def get(self): g = ConjunctiveGraph() ns = Namespace('http://purl.org/NET/mediatype#') for mt in models.MediaType.all(): g.add((URIRef(mt.uri), RDF.type, ns['MediaType'])) g.add((URIRef(mt.uri), RDFS.label, Literal(mt.name))) if mt.rfc_url: g.add((URIRef(mt.uri), RDFS.seeAlso, URIRef(mt.rfc_url))) if mt.application_url: g.add((URIRef(mt.uri), RDFS.seeAlso, URIRef(mt.application_url))) self.response.headers['Content-Type'] = 'application/rdf+xml' g.serialize(self.response.out)
def testSerialize(self): s1 = URIRef('store:1') r1 = URIRef('resource:1') r2 = URIRef('resource:2') label = URIRef('predicate:label') g1 = Graph(identifier = s1) g1.add((r1, label, Literal("label 1", lang="en"))) g1.add((r1, label, Literal("label 2"))) s2 = URIRef('store:2') g2 = Graph(identifier = s2) g2.add((r2, label, Literal("label 3"))) g = ConjunctiveGraph() for s,p,o in g1.triples((None, None, None)): g.addN([(s,p,o,g1)]) for s,p,o in g2.triples((None, None, None)): g.addN([(s,p,o,g2)]) r3 = URIRef('resource:3') g.add((r3, label, Literal(4))) r = g.serialize(format='trix') g3 = ConjunctiveGraph() from StringIO import StringIO g3.parse(StringIO(r), format='trix') for q in g3.quads((None,None,None)): # TODO: Fix once getGraph/getContext is in conjunctive graph if isinstance(q[3].identifier, URIRef): tg=Graph(store=g.store, identifier=q[3].identifier) else: # BNode, this is a bit ugly # we cannot match the bnode to the right graph automagically # here I know there is only one anonymous graph, # and that is the default one, but this is not always the case tg=g.default_context self.assertTrue(q[0:3] in tg)
def testSerialize(self): s1 = URIRef('store:1') r1 = URIRef('resource:1') r2 = URIRef('resource:2') label = URIRef('predicate:label') g1 = Graph(identifier=s1) g1.add((r1, label, Literal("label 1", lang="en"))) g1.add((r1, label, Literal("label 2"))) s2 = URIRef('store:2') g2 = Graph(identifier=s2) g2.add((r2, label, Literal("label 3"))) g = ConjunctiveGraph() for s, p, o in g1.triples((None, None, None)): g.addN([(s, p, o, g1)]) for s, p, o in g2.triples((None, None, None)): g.addN([(s, p, o, g2)]) r3 = URIRef('resource:3') g.add((r3, label, Literal(4))) r = g.serialize(format='trix') g3 = ConjunctiveGraph() from StringIO import StringIO g3.parse(StringIO(r), format='trix') for q in g3.quads((None, None, None)): # TODO: Fix once getGraph/getContext is in conjunctive graph if isinstance(q[3].identifier, URIRef): tg = Graph(store=g.store, identifier=q[3].identifier) else: # BNode, this is a bit ugly # we cannot match the bnode to the right graph automagically # here I know there is only one anonymous graph, # and that is the default one, but this is not always the case tg = g.default_context self.assertTrue(q[0:3] in tg)
def track(self, resource): graph = ConjunctiveGraph() sparql = SPARQLWrapper(self.conf.get_SPARQL()) queue = [resource] while len(queue) != 0: target = queue.pop() query = DESCRIBE_QUERY.replace('__RESOURCE__', target.n3()) query = query.replace('__RELEASE__', self.conf.get_graph_name('release')) query = query.replace('__RULES__', self.conf.get_graph_name('rules')) query = query.replace('__RAW_DATA__', self.conf.get_graph_name('raw-data')) sparql.setQuery(query) results = sparql.query().convert() for statement in results: # Add the statement to the graph graph.add(statement) # If the relate to another resource we describe, queue it (_,p,o) = statement if p.startswith(PROV): if o.startswith(self.conf.get_namespace('data')): queue.append(o) print graph.serialize(format='turtle')
def test_quad_not_in_dependency(custom_bundle): dep_desc = Descriptor.load(''' id: dep includes: - http://example.com/ctx ''') test_desc = Descriptor.load(''' id: test dependencies: - dep ''') depgraph = ConjunctiveGraph() ctx_graph = depgraph.get_context('http://example.com/other_ctx') quad = (URIRef('http://example.org/sub'), URIRef('http://example.org/prop'), URIRef('http://example.org/obj'), ctx_graph) depgraph.add(quad) with custom_bundle(dep_desc, graph=depgraph) as depbun, \ custom_bundle(test_desc, bundles_directory=depbun.bundles_directory) as testbun, \ Bundle('test', bundles_directory=testbun.bundles_directory) as bnd: assert quad not in bnd.rdf
class ContextTestCase(unittest.TestCase): #store = 'Memory' store = 'default' slow = True tmppath = None def setUp(self): self.graph = ConjunctiveGraph(store=self.store) if self.store == "MySQL": from mysql import configString from rdflib.store.MySQL import MySQL path = configString MySQL().destroy(path) else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') self.c1 = URIRef(u'context-1') self.c2 = URIRef(u'context-2') # delete the graph for each test! self.graph.remove((None, None, None)) def tearDown(self): self.graph.close() shutil.rmtree(self.tmppath) def get_context(self, identifier): assert isinstance(identifier, URIRef) or \ isinstance(identifier, BNode), type(identifier) return Graph(store=self.graph.store, identifier=identifier, namespace_manager=self) def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) # gasp! def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) self.assertEquals(len(self.graph), len(graph)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for i in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEquals(len(graph), oldLen + 10) self.assertEquals(len(self.get_context(c1)), oldLen + 10) self.graph.remove_context(self.get_context(c1)) self.assertEquals(len(self.graph), oldLen) self.assertEquals(len(graph), 0) def testLenInMultipleContexts(self): oldLen = len(self.graph) self.addStuffInMultipleContexts() # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 self.assertEquals(len(self.graph), oldLen + 1) graph = Graph(self.graph.store, self.c1) self.assertEquals(len(graph), oldLen + 1) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assert_(triple in self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assert_(triple in self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assert_(triple in self.graph) self.graph.remove(triple) # now gone! self.assert_(triple not in self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assert_(triple not in self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() def cid(c): return c.identifier self.assert_(self.c1 in map(cid, self.graph.contexts())) self.assert_(self.c2 in map(cid, self.graph.contexts())) contextList = map(cid, list(self.graph.contexts(triple))) self.assert_(self.c1 in contextList) self.assert_(self.c2 in contextList) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEquals(len(Graph(self.graph.store, c1)), 1) self.assertEquals(len(self.get_context(c1)), 1) self.graph.remove_context(self.get_context(c1)) self.assert_(self.c1 not in self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEquals(len(self.graph), 0) def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 asserte = self.assertEquals triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context asserte(len(list(c1triples((Any, likes, pizza)))), 2) asserte(len(list(c1triples((Any, hates, pizza)))), 1) asserte(len(list(c1triples((Any, likes, cheese)))), 3) asserte(len(list(c1triples((Any, hates, cheese)))), 0) # unbound subjects without context, same results! asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects with context asserte(len(list(c1triples((michel, likes, Any)))), 2) asserte(len(list(c1triples((tarek, likes, Any)))), 2) asserte(len(list(c1triples((bob, hates, Any)))), 2) asserte(len(list(c1triples((bob, likes, Any)))), 1) # unbound objects without context, same results! asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates with context asserte(len(list(c1triples((michel, Any, cheese)))), 1) asserte(len(list(c1triples((tarek, Any, cheese)))), 1) asserte(len(list(c1triples((bob, Any, pizza)))), 1) asserte(len(list(c1triples((bob, Any, michel)))), 1) # unbound predicates without context, same results! asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects with context asserte(len(list(c1triples((Any, hates, Any)))), 2) asserte(len(list(c1triples((Any, likes, Any)))), 5) # unbound subject, objects without context, same results! asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects with context asserte(len(list(c1triples((michel, Any, Any)))), 2) asserte(len(list(c1triples((bob, Any, Any)))), 3) asserte(len(list(c1triples((tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates with context asserte(len(list(c1triples((Any, Any, pizza)))), 3) asserte(len(list(c1triples((Any, Any, cheese)))), 3) asserte(len(list(c1triples((Any, Any, michel)))), 1) # unbound subjects, predicates without context, same results! asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound with context asserte(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! asserte(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.get_context(c1)]: # unbound subjects asserte(set(c.subjects(likes, pizza)), set((michel, tarek))) asserte(set(c.subjects(hates, pizza)), set((bob, ))) asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel])) asserte(set(c.subjects(hates, cheese)), set()) # unbound objects asserte(set(c.objects(michel, likes)), set([cheese, pizza])) asserte(set(c.objects(tarek, likes)), set([cheese, pizza])) asserte(set(c.objects(bob, hates)), set([michel, pizza])) asserte(set(c.objects(bob, likes)), set([cheese])) # unbound predicates asserte(set(c.predicates(michel, cheese)), set([likes])) asserte(set(c.predicates(tarek, cheese)), set([likes])) asserte(set(c.predicates(bob, pizza)), set([hates])) asserte(set(c.predicates(bob, michel)), set([hates])) asserte(set(c.subject_objects(hates)), set([(bob, pizza), (bob, michel)])) asserte( set(c.subject_objects(likes)), set([(tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza)])) asserte(set(c.predicate_objects(michel)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)])) asserte(set(c.predicate_objects(tarek)), set([(likes, cheese), (likes, pizza)])) asserte(set(c.subject_predicates(pizza)), set([(bob, hates), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(cheese)), set([(bob, likes), (tarek, likes), (michel, likes)])) asserte(set(c.subject_predicates(michel)), set([(bob, hates)])) asserte( set(c), set([(bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese)])) # remove stuff and make sure the graph is empty again self.removeStuff() asserte(len(list(c1triples((Any, Any, Any)))), 0) asserte(len(list(triples((Any, Any, Any)))), 0)
def convert(muldicat_csv): g = ConjunctiveGraph() g.bind('skos', SKOS) g.bind('dct', DCT) # add concept scheme g.add((muldicat, RDF.type, SKOS.ConceptScheme)) g.add((muldicat, DCT.title, Literal("Multilingual Dictionary of Cataloging Terms and Concepts", lang="en"))) g.add((muldicat, DCT.description, Literal(description, datatype=XHTML))) g.add((muldicat, DCT.modified, Literal(datetime.date.today()))) # work through each row of the spreadsheet, adding concepts as we go subject = None for row in unicode_csv_reader(codecs.open(muldicat_csv, encoding='utf-8')): # strip whitespace from row row = [cell.strip() for cell in row] # older version of the table had an unused ID column if len(row) == 8: print "popping" row.pop(0) if row[0] == 'Language': continue elif row == [u'', u'', u'', u'', u'', u'', u'', u'']: continue else: lang, label, definition, see, see_also, source, modified = row lang = languages.get(lang, None) label = label.strip() if not lang or not label: continue # use the english label to form part of the URI for the concept # hopefully not too controversial? if lang == 'en': subject = make_id(label) g.add((subject, RDF.type, SKOS.Concept)) g.add((subject, SKOS.prefLabel, Literal(label, lang=lang))) g.add((subject, SKOS.inScheme, muldicat)) if definition: g.add((subject, SKOS.definition, Literal(definition, lang=lang))) if source: g.add((subject, DCT.source, Literal(source, lang=lang))) if modified: date = datetime.datetime.strptime(modified, '%Y%m%d').date() # only record the latest last modification date for the concept existing_date = g.value(subject, DCT.modified) if not existing_date and date: g.add((subject, DCT.modified, Literal(date))) elif date and existing_date and date > existing_date.toPython(): g.remove((subject, DCT.modified, existing_date)) g.add((subject, DCT.modified, Literal(date))) for alt_label in see.split(','): if not alt_label: continue alt_label = alt_label.strip() g.add((subject, SKOS.altLabel, Literal(alt_label, lang=lang))) # link up relations if we have the english label if lang == 'en' and see_also: for s in see_also.split(','): s = s.strip() match = re.match(r'(.*) \[(.*?)\]', s) if not match: continue label, reltype = match.groups() reltype = reltype.strip('[]') # some are formatted wrong object = make_id(label) if reltype == 'BT': g.add((subject, SKOS.broader, object)) g.add((object, SKOS.narrower, subject)) elif reltype == 'NT': g.add((subject, SKOS.narrower, object)) g.add((object, SKOS.broader, subject)) elif reltype == 'RT': g.add((subject, SKOS.related, object)) g.add((object, SKOS.related, subject)) else: raise RuntimeError(reltype) return g
def rdf_all(notation='xml'): """ Funtion takes title of node, and rdf notation. """ valid_formats = ["xml", "n3", "ntriples", "trix"] default_graph_uri = "http://gstudio.gnowledge.org/rdfstore" configString = "/var/tmp/rdfstore" # Get the IOMemory plugin. store = plugin.get('IOMemory', Store)('rdfstore') # Open previously created store, or create it if it doesn't exist yet graph = Graph(store="IOMemory", identifier = URIRef(default_graph_uri)) path = mkdtemp() rt = graph.open(path, create=False) if rt == NO_STORE: graph.open(path, create=True) else: assert rt == VALID_STORE, "The underlying store is corrupt" # Now we'll add some triples to the graph & commit the changes graph.bind("gstudio", "http://gnowledge.org/") exclusion_fields = ["id", "rght", "node_ptr_id", "image", "lft", "_state", "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields"] for node in NID.objects.all(): node_dict=node.ref.__dict__ node_type = node.reftype try: if (node_type=='Gbobject'): node=Gbobject.objects.get(title=node) elif (node_type=='None'): node=Gbobject.objects.get(title=node) elif (node_type=='Processes'): node=Gbobject.objects.get(title=node) elif (node_type=='System'): node=Gbobject.objects.get(title=node) rdflib=link(node) url_addr=link1(node) a=fstore_dump(url_addr) elif (node_type=='Objecttype'): node=Objecttype.objects.get(title=node) elif (node_type=='Attributetype'): node=Attributetype.objects.get(title=node) elif (node_type=='Complement'): node=Complement.objects.get(title=node) elif (node_type=='Union'): node=Union.objects.get(title=node) elif (node_type=='Intersection'): node=Intersection.objects.get(title=node) elif (node_type=='Expression'): node=Expression.objects.get(title=node) elif (node_type=='Processtype'): node=Processtype.objects.get(title=node) elif (node_type=='Systemtype'): node=Systemtype.objects.get(title=node) elif (node_type=='AttributeSpecification'): node=AttributeSpecification.objects.get(title=node) elif (node_type=='RelationSpecification'): node=RelationSpecification.objects.get(title=node) rdflib=link(node) url_addr=link1(node) a=fstore_dump(url_addr) if(node_type=='Attribute'): node=Attribute.objects.get(title=node) rdflib = Namespace('http://sbox.gnowledge.org/gstudio/') elif(node_type=='Relationtype' ): node=Relationtype.objects.get(title=node) rdflib = Namespace('http://sbox.gnowledge.org/gstudio/') elif(node_type=='Metatype'): node=Metatype.objects.get(title=node) rdflib = Namespace('http://sbox.gnowledge.org/gstudio/') url_addr='http://sbox.gnowledge.org/gstudio/' a=fstore_dump(url_addr) except: if(node_type=='Attribute'): rdflib= Namespace('http://sbox.gnowledge.org/gstudio/') if(node_type=='Relationtype' ): rdflib= Namespace('http://sbox.gnowledge.org/gstudio/') if(node_type=='Metatype'): rdflib= Namespace('http://sbox.gnowledge.org/gstudio/') subject=str(node_dict['id']) for key in node_dict: if key not in exclusion_fields: predicate=str(key) pobject=str(node_dict[predicate]) graph.add((rdflib[subject], rdflib[predicate], Literal(pobject))) rdf_code=graph.serialize(format=notation) #path to store the rdf in a file #x = os.path.join(os.path.dirname(__file__), 'rdffiles.rdf') graph.commit() graph.close()
from rdflib.term import Literal, BNode, URIRef from rdflib.graph import ConjunctiveGraph from rdflib.namespace import Namespace DC = Namespace(u"http://purl.org/dc/elements/1.1/") FOAF = Namespace(u"http://xmlns.com/foaf/0.1/") graph = ConjunctiveGraph() s = BNode() graph.add((s, FOAF['givenName'], Literal('Alice'))) b = BNode() graph.add((b, FOAF['givenName'], Literal('Bob'))) graph.add((b, DC['date'], Literal("2005-04-04T04:04:04Z"))) def test_bound(): res = list( graph.query("""PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> SELECT ?name WHERE { ?x foaf:givenName ?name . OPTIONAL { ?x dc:date ?date } . FILTER ( bound(?date) ) }""")) expected = [(Literal('Bob', lang=None, datatype=None), )] assert res == expected, "Expected %s but got %s" % (expected, res) if __name__ == '__main__': test_bound()
class Hisco2RDF(): ''' Scrapes the HISCO Web site The hierarchy goes as "master > minor > rubri > micro" ''' def __init__(self): # The graph to store the data self.graph = ConjunctiveGraph() self.graph.namespace_manager.bind('skos', SKOS) self.graph.namespace_manager.bind('hisco', HISCO) self.graph.namespace_manager.bind('dcterms', DCTERMS) self.graph.namespace_manager.bind('sdmx-dimension', SDMX_DIMENSION) self.graph.namespace_manager.bind('sdmx-code', SDMX_CODE) self.graph.namespace_manager.bind('qb', QB) # SQLite DB for the cache self.cache = sqlite3.connect('cache.db') cursor = self.cache.cursor() cursor.execute("CREATE TABLE IF NOT EXISTS page (url text, html text)") self.cache.commit() def __del__(self): self.cache.close() def get_page(self, url): #log.debug("Load %s" % url) c = self.cache.cursor() c.execute("SELECT * FROM page WHERE url = ?", (url,)) res = c.fetchone() doc = None if res == None: doc = requests.get(url).content c.execute("INSERT INTO page VALUES (?,?)", (url, doc)) self.cache.commit() else: (_, doc) = res return BeautifulSoup(doc) def save_output(self): # Add more things needed for DataCubes dimprop = HISCO['occupation'] self.graph.add((dimprop, RDF.type, QB['DimensionProperty'])) self.graph.add((dimprop, RDFS.range, SKOS.Collection)) self.graph.add((dimprop, QB['Concept'], SKOS.Collection)) self.graph.add((dimprop, RDFS.label, Literal('Occupation code', lang='en'))) self.graph.add((dimprop, RDFS.comment, Literal('The HISCO group of the occupation', lang='en'))) # Print to the screen #outfile = sys.stdout.buffer #self.graph.serialize(destination=outfile, format='n3') # Save to the file outfile = open('../hisco.ttl', "wb") self.graph.serialize(destination=outfile, format='n3') outfile.close() def parse_hisco_tree(self): ''' Parse the hisco tree ''' # Load the page doc = self.get_page(ROOT + HISCO_TREE) # Find the major groups major_groups = [] major_group = None for table in doc.find_all('table', attrs={'border':'0'}): for row in table.find_all('tr'): for col in row.find_all('td'): # Skip empty rows if len(col.text) == 1: continue # We are starting a new group if col.text.startswith('Majorgroup'): # Save the one we were building if any if major_group != None: major_groups.append(major_group) m = re.search("Majorgroup ([^ ]*) ", col.text) major_group = {} major_group['title'] = col.text major_group['code'] = m.group(1).replace('/', '-') # We have a description if col.text.startswith('Workers'): major_group['description'] = col.text # We have links to minor if col.text.startswith('List Minor'): link = col.find_all('a')[0]['href'] major_group.setdefault('links', []) major_group['links'].append(link) # Add the last group in the making if major_group != None: major_groups.append(major_group) # Add the groups to the graph for group in major_groups: major_group_uri = self._get_group_uri(group['code']) self.graph.add((major_group_uri, RDF.type, SKOS['ConceptScheme'])) self.graph.add((major_group_uri, DCTERMS.title, Literal(group['title']))) self.graph.add((major_group_uri, DCTERMS.description, Literal(group['description']))) # Now move onto the minor groups following the links for major_group in major_groups: major_group_uri = self._get_group_uri(major_group['code']) for minor_link in major_group['links']: # Look for the minor groups minor_groups = self._parse_records_table(minor_link, 2) # Add the groups to the graph for minor_group in minor_groups: minor_group_uri = self._get_group_uri(minor_group['code']) self.graph.add((minor_group_uri, RDF.type, SKOS['ConceptScheme'])) self.graph.add((minor_group_uri, RDFS.label, Literal(minor_group['title']))) self.graph.add((minor_group_uri, DCTERMS.description, Literal(minor_group['description']))) self.graph.add((major_group_uri, SKOS.related, minor_group_uri)) # Got one level deeper into the rubri for rubri_link in minor_group['links']: # Look for the minor groups rubri_groups = self._parse_records_table(rubri_link, 3) # Add the groups to the graph for rubri_group in rubri_groups: rubri_group_uri = self._get_group_uri(rubri_group['code']) self.graph.add((rubri_group_uri, RDF.type, SKOS['ConceptScheme'])) self.graph.add((rubri_group_uri, RDFS.label, Literal(rubri_group['title']))) self.graph.add((rubri_group_uri, DCTERMS.description, Literal(rubri_group['description']))) self.graph.add((minor_group_uri, SKOS.related, rubri_group_uri)) # And one deeper for the micro for micro_link in rubri_group['links']: # Look for the minor groups micro_groups = self._parse_records_table(micro_link, 5) # Add the groups to the graph for micro_group in micro_groups: hisco_uri = self._get_hisco_uri(micro_group['code']) self.graph.add((hisco_uri, RDF.type, SKOS['Collection'])) self.graph.add((hisco_uri, RDFS.label, Literal(micro_group['title']))) self.graph.add((hisco_uri, DCTERMS.description, Literal(micro_group['description']))) self.graph.add((rubri_group_uri, SKOS.related, hisco_uri)) def parse_occupational_titles(self): ''' Scrape the section of the site about occupational titles Last page = http://historyofwork.iisg.nl/list_hiswi.php?step=1845&publish=Y&modus=ftsearch ''' parsed_status_page = set() next_page = OCCUPATIONAL_TITLES while next_page != None: log.info("Parse titles %s" % next_page) # Load the page doc = self.get_page(ROOT + next_page) # Find the right table table = doc.find('table', attrs={'cellspacing':'0', 'cellpadding':'2', 'border':'0'}) # Look for all the titles for row in table.find_all('tr')[1:]: # Skip the header cols = row.find_all('td') occupation_title = cols[1].text details_page_link = cols[1].find_all('a')[0]['href'] language = LANG_MAP[cols[2].text] hisco_code = cols[3].text.replace('*', '') # Get the DB index from details_page_link m = re.search('know_id=([^&]*)&', details_page_link) occupation_index = m.group(1) # Add the concept to the graph resource = self._get_occupation_title_uri(occupation_index) self.graph.add((resource, RDF.type, SKOS['Concept'])) self.graph.add((resource, SKOS.prefLabel, Literal(occupation_title, lang=language))) self.graph.add((resource, SKOS.member, self._get_hisco_uri(hisco_code))) # Get more information about the title and add it as a member of the collection details_page = self.get_page(ROOT + details_page_link) details_table = details_page.find('table', attrs={'cellspacing':'8', 'cellpadding':'0'}) keyvalues = {} for details_row in details_table.find_all('tr'): details_cols = details_row.find_all('td') keyvalues[details_cols[0].text.strip()] = details_cols[-1] # We already dealt with these two del keyvalues['Hisco code'] del keyvalues['Occupational title'] # TODO Country , use refArea # TODO Language # Do we know the gender ? if 'Gender' in keyvalues: sex = SDMX_CODE['sex-U'] # Also applies to "Male/Female" if keyvalues['Gender'].text.strip() == 'Male': sex = SDMX_CODE['sex-M'] elif keyvalues['Gender'].text.strip() == 'Female': sex = SDMX_CODE['sex-F'] self.graph.add((resource, SDMX_DIMENSION['sex'], sex)) del keyvalues['Gender'] # Do we know the status ? if 'Status' in keyvalues: # Add the status status = keyvalues['Status'].text.strip() self.graph.add((resource, HISCO['status'], self._get_status_uri(status))) # Parse the status page if necessary status_page = keyvalues['Status'].find_all('a')[0]['href'] if status_page not in parsed_status_page: self._parse_status_page(status_page) parsed_status_page.add(status_page) del keyvalues['Status'] # TODO Relation # TODO Product # TODO Provenance # Do we have a translation in English ? if 'Translation' in keyvalues: trans = Literal(keyvalues['Translation'].text.strip().replace('´', "'"), lang='en') self.graph.add((resource, SKOS.altLabel, trans)) del keyvalues['Translation'] # Print whatever is left #if len(keyvalues.keys()) != 0: # log.info(keyvalues.keys()) # Look for the "next" link next_table = doc.find('table', class_='nextprev') next_page = None for link in next_table.find_all('a'): if 'Next' in link.text: next_page = link['href'] def _parse_status_page(self, url): ''' Parses a status page such as http://historyofwork.iisg.nl/status.php?int02=32 ''' # Work-around broken content if url == 'status.php?int02=15': return # Load the page doc = self.get_page(ROOT + url) # Find the data about this status status_uri = None for line in doc.find('pre').text.split('\n'): if re.match("^[0-9]* [a-zA-Z]*", line): m = re.search("^([0-9]*) ([a-zA-Z]*)", line) status_uri = self._get_status_uri(m.group(1)) self.graph.add((status_uri, RDF.type, HISCO['Status'])) self.graph.add((status_uri, RDFS.label, Literal(m.group(2)))) self.graph.add((status_uri, SKOS.prefLabel, Literal(m.group(2)))) self.graph.add((status_uri, SKOS.notation, Literal(m.group(1)))) if re.match("^[A-Z]{2}:\t[a-zA-Z]*", line): m = re.search("^([A-Z]{2}):\t([a-zA-Z]*)", line) lang_code = m.group(1).lower() label = Literal(m.group(2), lang = lang_code) self.graph.add((status_uri, SKOS.altLabel, label)) # Describe the class status_class = HISCO['Status'] descr = doc.find('table', attrs={'width':'600'}).text.strip().split('\r\n') self.graph.add((status_class, RDF.type, RDFS.Class)) self.graph.add((status_class, RDFS.label, Literal("Status code"))) self.graph.add((status_class, DCTERMS.comment, Literal(descr[1]))) # Describe the property status_property = HISCO['status'] self.graph.add((status_property, RDF.type, RDF.Property)) self.graph.add((status_property, RDFS.label, Literal("status associated to the occupation"))) self.graph.add((status_property, RDFS.range, HISCO['Status'])) self.graph.add((status_property, RDFS.domain, SKOS.Concept)) def _parse_records_table(self, url, size): ''' Minor, Rubri and Micro have the same structure except an additional column for Micro with links to the titles ''' # Load the page doc = self.get_page(ROOT + url) # Find the right table table = doc.find('table', attrs={'cellspacing':'8', 'cellpadding':'0'}) # If we can't find the table return an empty list # work around for http://historyofwork.iisg.nl/list_micro.php?keywords=920&keywords_qt=lstrict if table == None: return [] # Look for the minor groups groups = [] group = None columns = table.find_all('td') for index in range(0, len(columns)): # New group if re.match("[0-9]{%d}" % size, columns[index].text): if group != None: groups.append(group) group = {} group['code'] = columns[index].text group['title'] = columns[index + 1].text link = columns[index + 1].find_all('a')[0]['href'] group.setdefault('links', []) group['links'].append(link) group['description'] = columns[index + 2].text if columns[index + 3].text == "Display Titles": link = columns[index + 3].find_all('a')[0]['href'] group['titles_link'] = link groups.append(group) return groups def _get_group_uri(self, code): return HISCO['group-%s' % code] def _get_hisco_uri(self, code): return HISCO['hisco-%s' % code] def _get_occupation_title_uri(self, code): return HISCO['occupation-%s' % code] def _get_status_uri(self, code): return HISCO['status-%s' % code]
def make_rdf_graph(movies): mg = ConjunctiveGraph() mg.bind('fb', FB) mg.bind('dc', DC) for movie in movies: # Make a movie node movie_node = IVA_MOVIE[movie['id']] mg.add((movie_node, DC['title'], Literal(movie['title']))) # Make the director node, give it a name and link it to the movie dir_node = IVA_PERSON[movie['director']['id']] mg.add((movie_node, FB['film.film.directed_by'], dir_node)) mg.add((dir_node, DC['title'], Literal(movie['director']['name']))) for actor in movie['actors']: # The performance node is a blank node -- it has no URI performance = BNode() # The performance is connected to the actor and the movie actor_node = IVA_PERSON[actor['id']] mg.add((actor_node, DC['title'], Literal(actor['name']))) mg.add((performance, FB['film.performance.actor'], actor_node)) # If you had the name of the role, you could also add it to the # performance node, e.g. # mg.add((performance,FB['film.performance.role'],Literal('Carrie Bradshaw'))) mg.add((movie_node, FB['film.film.performances'], performance)) return mg
from rdflib.graph import ConjunctiveGraph from rdflib.namespace import Namespace, RDF, XSD from rdflib.term import BNode, Literal import rdflib DC = Namespace(u"http://purl.org/dc/elements/1.1/") FUNC = Namespace(u"http://example.org/functions#") graph = ConjunctiveGraph() graph.add((BNode(), RDF.value, Literal(0))) graph.add((BNode(), RDF.value, Literal(1))) graph.add((BNode(), RDF.value, Literal(2))) graph.add((BNode(), RDF.value, Literal(3))) from rdflib.term import _toPythonMapping NUMERIC_TYPES = [type_uri for type_uri in _toPythonMapping if \ _toPythonMapping[type_uri] in (int, float, long)] def func_even(a): # Should this be required, or be done automatically? from rdfextras.sparql.sparqlOperators import getValue value = getValue(a) if isinstance(value, Literal) and value.datatype in NUMERIC_TYPES: return Literal(int(value.toPython() % 2 == 0), datatype=XSD.boolean) else: raise TypeError(a)
class TestLevelDBConjunctiveGraphCore(unittest.TestCase): def setUp(self): store = "LevelDB" self.graph = ConjunctiveGraph(store=store) self.path = configString self.graph.open(self.path, create=True) def tearDown(self): self.graph.destroy(self.path) try: self.graph.close() except: pass if getattr(self, 'path', False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): for f in os.listdir(self.path): os.unlink(self.path + '/' + f) os.rmdir(self.path) elif len(self.path.split(':')) == 1: os.unlink(self.path) else: os.remove(self.path) def test_namespaces(self): self.graph.bind("dc", "http://http://purl.org/dc/elements/1.1/") self.graph.bind("foaf", "http://xmlns.com/foaf/0.1/") self.assert_(len(list(self.graph.namespaces())) == 5) self.assert_(('foaf', rdflib.term.URIRef(u'http://xmlns.com/foaf/0.1/') ) in list(self.graph.namespaces())) def test_readable_index(self): print(readable_index(111)) def test_triples_context_reset(self): michel = rdflib.URIRef(u'michel') likes = rdflib.URIRef(u'likes') pizza = rdflib.URIRef(u'pizza') cheese = rdflib.URIRef(u'cheese') self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() ntriples = self.graph.triples((None, None, None), context=self.graph.store) self.assert_(len(list(ntriples)) == 2) def test_remove_context_reset(self): michel = rdflib.URIRef(u'michel') likes = rdflib.URIRef(u'likes') pizza = rdflib.URIRef(u'pizza') cheese = rdflib.URIRef(u'cheese') self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() self.graph.store.remove((michel, likes, cheese), self.graph.store) self.graph.commit() self.assert_( len( list( self.graph.triples((None, None, None), context=self.graph.store))) == 1) def test_remove_db_exception(self): michel = rdflib.URIRef(u'michel') likes = rdflib.URIRef(u'likes') pizza = rdflib.URIRef(u'pizza') cheese = rdflib.URIRef(u'cheese') self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() self.graph.store.__len__(context=self.graph.store) self.assert_( len( list( self.graph.triples((None, None, None), context=self.graph.store))) == 2)
def to_RDF(records, base_namespace, lang_codes=None,skosxl=False): """ docstring for as_RDF """ from rdflib import Namespace, BNode, Literal, URIRef,RDF,RDFS from rdflib.graph import Graph, ConjunctiveGraph from rdflib.plugins.memory import IOMemory print >> sys.stderr, base_namespace store = IOMemory() g = ConjunctiveGraph(store=store) skos = Namespace('http://www.w3.org/2004/02/skos/core#') skosxl = Namespace('http://www.w3.org/2008/05/skos-xl#') base = Namespace(base_namespace) g.bind('skos',skos) g.bind('skosxl',skosxl) g.bind('base',base) scheme_label = "schemes/1" thesaurus = URIRef(base[scheme_label]) g.add((thesaurus,RDF.type, skos["ConceptScheme"])) for n,record in enumerate(records): label_counter = 1 try: if(record is not None): uri = URIRef(base["%s/concepts/%i"%(scheme_label,int(record['id']))]) g.add((uri, RDF.type, skos['Concept'])) g.add((uri,skos["inScheme"],thesaurus)) if(record['broader_id'] is not None): broader_uri = URIRef(base["%s/concepts/%i"%(scheme_label,int(record['broader_id']))]) g.add((uri,skos['broader'],broader_uri)) g.add((broader_uri,skos['narrower'],uri)) else: g.add((uri,skos["topConceptOf"],thesaurus)) if(record['hidden_label'] is not None): if(skosxl): label_uri = URIRef("%s#l%i"%(uri,label_counter)) g.add((label_uri,RDF.type,skosxl["Label"])) g.add((label_uri,skosxl["literalForm"],Literal(record['hidden_label']))) g.add((uri,skosxl["hiddenLabel"],label_uri)) label_counter += 1 else: g.add((uri,skos["hiddenLabel"],Literal(record['hidden_label']))) if(record['labels'] is not None): # when transforming into SKOS-XL append the hiddenLabel to the preferredLabel@de # of a given term. This way it becomes possible to use the hiddenLabel to distinguish # between concepts with the same label but different provenance (i.e. they are found # within different branches of the same thesaurus tree. if(skosxl): label_uri = URIRef("%s#l%i"%(uri,label_counter)) g.add((label_uri,RDF.type,skosxl["Label"])) g.add((label_uri,skosxl["literalForm"],Literal("%s (%s)"%(record['labels']["ger"],record['hidden_label']),lang=lang_codes["ger"]))) g.add((uri,skosxl["prefLabel"],label_uri)) label_counter += 1 for lang in record['labels'].keys(): if(skosxl): label_uri = URIRef("%s#l%i"%(uri,label_counter)) g.add((label_uri,RDF.type,skosxl["Label"])) g.add((label_uri,skosxl["literalForm"],Literal(record['labels'][lang],lang=lang_codes[lang]))) g.add((uri,skosxl["prefLabel"],label_uri)) label_counter += 1 else: g.add((uri,skos["prefLabel"],Literal(record['labels'][lang],lang=lang_codes[lang]))) if(record['anon_nodes'] is not None): for node_id,node in record['anon_nodes']: temp = URIRef(base["%s/concepts/%s"%(scheme_label,node_id)]) print >> sys.stderr, temp g.add((temp,RDF.type,skos['Concept'])) g.add((temp,skos["inScheme"],thesaurus)) g.add((temp,skos['broader'],uri)) if(skosxl): label_uri = URIRef("%s#l%i"%(temp,label_counter)) g.add((label_uri,RDF.type,skosxl["Label"])) g.add((label_uri,skosxl["literalForm"],Literal(node,lang="de"))) g.add((temp,skosxl["prefLabel"],label_uri)) label_counter += 1 # added extra preferredLabel@de with hiddenLabel betwen brackets label_uri = URIRef("%s#l%i"%(temp,label_counter)) g.add((label_uri,RDF.type,skosxl["Label"])) g.add((label_uri,skosxl["literalForm"],Literal("%s (%s)"%(node,record['hidden_label']),lang="de"))) g.add((temp,skosxl["prefLabel"],label_uri)) label_counter += 1 else: g.add((temp,skos["prefLabel"],Literal(node,lang="de"))) print >> sys.stderr, "Record %s converted into RDF (%i/%i)"%(record['id'],n,len(records)) except Exception, e: print >> sys.stderr, "Failed converting record %s with error %s (%i/%i)"%(record['id'],str(e),n,len(records))
def query_lode(self,id): var = "http://inpho.cogs.indiana.edu/thinker/"+id # initialize dictionaries to store temporray results dbPropResults = {} inpho_DB = {} DB_inpho = {} dbpedia_web = {} triples={} # init graphs for LODE and mapped data gLODE = ConjunctiveGraph() gReturn = ConjunctiveGraph() # import InPhO data gLODE.parse("http://inphodata.cogs.indiana.edu/lode/out_n3.20140207.rdf", format="n3") # builds a set of triples with the inpho id as the first entry and the # dbpedia id as the second resultsLODE = gLODE.query(""" SELECT ?thinker_LODE ?thinkerDB WHERE { ?thinker_LODE owl:sameAs ?thinkerDB FILTER (regex(str(?thinker_LODE),"http://inpho.cogs.indiana.edu","i") && regex(str(?thinkerDB),"http://dbpedia.org/resource/","i")). } """) # load in property mapping between inpho-dbpedia prop_map_filename = config.get_data_path('rdf_map.txt') with open(prop_map_filename,'r') as f: dbprops=csv.reader(f,delimiter='\t') for dbprop in dbprops: dbPropResults[dbprop[1]] = dbprop[0] dbpedia_web[dbprop[1].split(":")[1]]=dbprop[2] # iterate through triples and store mappings for triple in resultsLODE: inpho_DB[str(triple[0])] = str(triple[1])#store the results in key as inpho url and value as dbpedia url DB_inpho[str(triple[1])] = str(triple[0])#store the results in key as dbpedia url and value as inpho url # queries for all relationships in dbpedia sparqlDB = SPARQLWrapper("http://inpho-dataserve.cogs.indiana.edu:8890/sparql/") sparqlDB.setReturnFormat(JSON) for inpho,DB in inpho_DB.iteritems(): predicate = {} #for dbprop in dbPropResults: if(str(DB_inpho.get(DB))== var): for dbprop in dbPropResults: sparqlDB.setQuery(""" PREFIX dbpprop: <http://dbpedia.org/ontology/> SELECT ?b WHERE { <"""+DB+"""> """+dbprop+""" ?b. FILTER (regex(str(?b),"dbpedia.org/resource/","i")). }""") resultsDB = sparqlDB.query().convert() predicate[dbprop] = resultsDB["results"]["bindings"] triples[DB] = predicate #retrieve native python object c.entity = h.fetch_obj(Entity, id, new_id=True) existing_predicate_list=[] existing_object_list=[] predicates_to_compare = ['influenced', 'influenced_by', 'teachers', 'students'] for subject,predicate in triples.iteritems(): for predicate1, objectn in predicate.iteritems(): predicate_to_match=predicate1.split(":")[1] attr=getattr(c.entity,dbpedia_web[predicate_to_match]) for attr1 in attr: if(dbpedia_web[predicate_to_match] in predicates_to_compare) : existing_predicate_list.append(dbpedia_web[predicate_to_match] +':'+attr1.wiki) # maps from dbpedia relationships back to inpho relationships for subject,predicate in triples.iteritems(): #attr = getattr(c.entity, predicate) #raise Exception for predicate1, objectn in predicate.iteritems(): for object1 in objectn: #temp_str=dbpedia_web[predicate1.split(":")[1]] + ':'+str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1].replace("_"," ") temp_str=dbpedia_web[predicate1.split(":")[1]] + ':'+str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1] # raise Exception if temp_str not in existing_predicate_list: # returns the inphoid for the object DB_Entry = DB_inpho.get(object1['b']['value'])#reverse lookup for the inpho data check # if there is not an inpho id, leave it as the dbpedia id if(DB_Entry == None): gReturn.add((URIRef(subject),URIRef(dbPropResults.get(predicate1)),URIRef(object1['b']['value']))) else: # return the properly mapped id # TODO: use attr to filter DB_Entry gReturn.add((URIRef(subject),URIRef(dbPropResults.get(predicate1)),URIRef(DB_Entry))) # if "Francisco" in str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1].replace("_", ): # raise Exception return gReturn.serialize();
class ContextTest(test.TestCase): """ Testing different contexts. Heavily based on https://github.com/RDFLib/rdflib-postgresql/blob/master/test/context_case.py """ store_name = "Django" storetest = True path = "" create = True michel = URIRef(u'michel') tarek = URIRef(u'tarek') bob = URIRef(u'bob') likes = URIRef(u'likes') hates = URIRef(u'hates') pizza = URIRef(u'pizza') cheese = URIRef(u'cheese') c1 = URIRef(u'context-1') c2 = URIRef(u'context-2') def setUp(self): self.graph = ConjunctiveGraph(store=self.store_name) self.graph.destroy(self.path) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) self.graph.close() def get_context(self, identifier): assert isinstance(identifier, URIRef) or isinstance(identifier, BNode), type(identifier) return Graph(store=self.graph.store, identifier=identifier, namespace_manager=self) def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) self.assertEquals(len(graph), 2) self.assertEquals(len(self.graph), 2) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for _ in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEquals(len(graph), oldLen + 10) self.assertEquals(len(self.get_context(c1)), oldLen + 10) self.graph.remove_context(self.get_context(c1)) self.assertEquals(len(self.graph), oldLen) self.assertEquals(len(graph), 0) def testLenInMultipleContexts(self): oldLen = len(self.graph) self.addStuffInMultipleContexts() # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 self.assertEquals(len(self.graph), oldLen + 1) graph = Graph(self.graph.store, self.c1) self.assertEquals(len(graph), oldLen + 1) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assertIn(triple, self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assertIn(triple, self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assertIn(triple, self.graph) self.graph.remove(triple) # now gone! self.assertNotIn(triple, self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assertNotIn(triple, self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) self.addStuffInMultipleContexts() def cid(c): if not isinstance(c, basestring): return c.identifier return c self.assertIn(self.c1, [cid(c) for c in self.graph.contexts()]) self.assertIn(self.c2, [cid(c) for c in self.graph.contexts()]) contextList = [cid(c) for c in self.graph.contexts(triple)] self.assertIn(self.c1, contextList) self.assertIn(self.c2, contextList) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEquals(len(Graph(self.graph.store, c1)), 1) self.assertEquals(len(self.get_context(c1)), 1) self.graph.remove_context(self.get_context(c1)) self.assert_(self.c1 not in self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEquals(len(self.graph), 0) def testTriples(self): triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, self.c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context self.assertEquals(len(list(c1triples((Any, self.likes, self.pizza)))), 2) self.assertEquals(len(list(c1triples((Any, self.hates, self.pizza)))), 1) self.assertEquals(len(list(c1triples((Any, self.likes, self.cheese)))), 3) self.assertEquals(len(list(c1triples((Any, self.hates, self.cheese)))), 0) # unbound subjects without context, same results! self.assertEquals(len(list(triples((Any, self.likes, self.pizza)))), 2) self.assertEquals(len(list(triples((Any, self.hates, self.pizza)))), 1) self.assertEquals(len(list(triples((Any, self.likes, self.cheese)))), 3) self.assertEquals(len(list(triples((Any, self.hates, self.cheese)))), 0) # unbound objects with context self.assertEquals(len(list(c1triples((self.michel, self.likes, Any)))), 2) self.assertEquals(len(list(c1triples((self.tarek, self.likes, Any)))), 2) self.assertEquals(len(list(c1triples((self.bob, self.hates, Any)))), 2) self.assertEquals(len(list(c1triples((self.bob, self.likes, Any)))), 1) # unbound objects without context, same results! self.assertEquals(len(list(triples((self.michel, self.likes, Any)))), 2) self.assertEquals(len(list(triples((self.tarek, self.likes, Any)))), 2) self.assertEquals(len(list(triples((self.bob, self.hates, Any)))), 2) self.assertEquals(len(list(triples((self.bob, self.likes, Any)))), 1) # unbound predicates with context self.assertEquals(len(list(c1triples((self.michel, Any, self.cheese)))), 1) self.assertEquals(len(list(c1triples((self.tarek, Any, self.cheese)))), 1) self.assertEquals(len(list(c1triples((self.bob, Any, self.pizza)))), 1) self.assertEquals(len(list(c1triples((self.bob, Any, self.michel)))), 1) # unbound predicates without context, same results! self.assertEquals(len(list(triples((self.michel, Any, self.cheese)))), 1) self.assertEquals(len(list(triples((self.tarek, Any, self.cheese)))), 1) self.assertEquals(len(list(triples((self.bob, Any, self.pizza)))), 1) self.assertEquals(len(list(triples((self.bob, Any, self.michel)))), 1) # unbound subject, objects with context self.assertEquals(len(list(c1triples((Any, self.hates, Any)))), 2) self.assertEquals(len(list(c1triples((Any, self.likes, Any)))), 5) # unbound subject, objects without context, same results! self.assertEquals(len(list(triples((Any, self.hates, Any)))), 2) self.assertEquals(len(list(triples((Any, self.likes, Any)))), 5) # unbound predicates, objects with context self.assertEquals(len(list(c1triples((self.michel, Any, Any)))), 2) self.assertEquals(len(list(c1triples((self.bob, Any, Any)))), 3) self.assertEquals(len(list(c1triples((self.tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! self.assertEquals(len(list(triples((self.michel, Any, Any)))), 2) self.assertEquals(len(list(triples((self.bob, Any, Any)))), 3) self.assertEquals(len(list(triples((self.tarek, Any, Any)))), 2) # unbound subjects, predicates with context self.assertEquals(len(list(c1triples((Any, Any, self.pizza)))), 3) self.assertEquals(len(list(c1triples((Any, Any, self.cheese)))), 3) self.assertEquals(len(list(c1triples((Any, Any, self.michel)))), 1) # unbound subjects, predicates without context, same results! self.assertEquals(len(list(triples((Any, Any, self.pizza)))), 3) self.assertEquals(len(list(triples((Any, Any, self.cheese)))), 3) self.assertEquals(len(list(triples((Any, Any, self.michel)))), 1) # all unbound with context self.assertEquals(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! self.assertEquals(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.get_context(self.c1)]: # unbound subjects self.assertEquals(set(c.subjects(self.likes, self.pizza)), {self.michel, self.tarek}) self.assertEquals(set(c.subjects(self.hates, self.pizza)), {self.bob}) self.assertEquals(set(c.subjects(self.likes, self.cheese)), {self.tarek, self.bob, self.michel}) self.assertEquals(set(c.subjects(self.hates, self.cheese)), set()) # unbound objects self.assertEquals(set(c.objects(self.michel, self.likes)), {self.cheese, self.pizza}) self.assertEquals(set(c.objects(self.tarek, self.likes)), {self.cheese, self.pizza}) self.assertEquals(set(c.objects(self.bob, self.hates)), {self.michel, self.pizza}) self.assertEquals(set(c.objects(self.bob, self.likes)), {self.cheese}) # unbound predicates self.assertEquals(set(c.predicates(self.michel, self.cheese)), {self.likes}) self.assertEquals(set(c.predicates(self.tarek, self.cheese)), {self.likes}) self.assertEquals(set(c.predicates(self.bob, self.pizza)), {self.hates}) self.assertEquals(set(c.predicates(self.bob, self.michel)), {self.hates}) self.assertEquals(set(c.subject_objects(self.hates)), {(self.bob, self.pizza), (self.bob, self.michel)}) self.assertEquals(set(c.subject_objects(self.likes)), {(self.tarek, self.cheese), (self.michel, self.cheese), (self.michel, self.pizza), (self.bob, self.cheese), (self.tarek, self.pizza)}) self.assertEquals(set(c.predicate_objects(self.michel)), {(self.likes, self.cheese), (self.likes, self.pizza)}) self.assertEquals(set(c.predicate_objects(self.bob)), {(self.likes, self.cheese), (self.hates, self.pizza), (self.hates, self.michel)}) self.assertEquals(set(c.predicate_objects(self.tarek)), {(self.likes, self.cheese), (self.likes, self.pizza)}) self.assertEquals(set(c.subject_predicates(self.pizza)), {(self.bob, self.hates), (self.tarek, self.likes), (self.michel, self.likes)}) self.assertEquals(set(c.subject_predicates(self.cheese)), {(self.bob, self.likes), (self.tarek, self.likes), (self.michel, self.likes)}) self.assertEquals(set(c.subject_predicates(self.michel)), {(self.bob, self.hates)}) self.assertEquals(set(c), {(self.bob, self.hates, self.michel), (self.bob, self.likes, self.cheese), (self.tarek, self.likes, self.pizza), (self.michel, self.likes, self.pizza), (self.michel, self.likes, self.cheese), (self.bob, self.hates, self.pizza), (self.tarek, self.likes, self.cheese)}) # remove stuff and make sure the graph is empty again self.removeStuff() self.assertEquals(len(list(c1triples((Any, Any, Any)))), 0) self.assertEquals(len(list(triples((Any, Any, Any)))), 0)
# Load up RDFLib from rdflib import * from rdflib.graph import ConjunctiveGraph from rdflib.namespace import Namespace from rdflib.term import URIRef from rdflib.parser import StringInputSource # Firstly, it doesn't have to be so complex. # Here we create a "Graph" of our work. # Think of it as a blank piece of graph paper! primer = ConjunctiveGraph() myNS = Namespace('#') primer.add((myNS.pat, myNS.knows, myNS.jo)) # or: primer.add((myNS['pat'], myNS['age'], long(24))) # Now, with just that, lets see how the system # recorded *way* too many details about what # you just asserted as fact. # from pprint import pprint pprint(list(primer)) # just think .whatever((s, p, o)) # here we report on what we know pprint(list(primer.subjects()))
default_graph_uri = "http://id.southampton.ac.uk/dataset/places" configString = # Get the mysql plugin. You may have to install the python mysql libraries store = plugin.get('MySQL', Store)('rdfstore') # Open previously created store, or create it if it doesn't exist yet rt = store.open(configString,create=False) if rt == NO_STORE: # There is no underlying MySQL infrastructure, create it store.open(configString,create=True) else: assert rt == VALID_STORE,"There underlying store is corrupted" # There is a store, use it graph = Graph(store, identifier = URIRef(default_graph_uri)) print("Triples in graph before add: %s" % len(graph)) # Now we'll add some triples to the graph & commit the changes rdflib = Namespace('http://rdflib.net/test/') graph.add((rdflib['pic:1'], rdflib['name'], Literal('Jane & Bob'))) graph.add((rdflib['pic:2'], rdflib['name'], Literal('Squirrel in Tree'))) graph.commit() print("Triples in graph after add: %" % len(graph)) # display the graph in RDF/XML print(graph.serialize())
def locationtoturtle(ellist, meta): rdf=Graph(); cs = Namespace("http://cs.unibo.it/ontology/") colon=Namespace("http://www.essepuntato.it/resource/") dcterms=Namespace("http://purl.org/dc/terms/") xsd=Namespace("http://www.w3.org/2001/XMLSchema#") this=Namespace("http://vitali.web.cs.unibo.it/twiki/pub/TechWeb12/DataSource2/posteBO2011.ttl#") vcard = Namespace("http://www.w3.org/2006/vcard/ns#") rdf.bind("vcard", vcard) rdf.bind("cs", cs) rdf.bind("", colon) rdf.bind("dcterms", dcterms) rdf.bind("xsd", xsd) rdf.bind("this", this) rdf.add((this["metadata"], dcterms["creator"], Literal(meta.creator))) rdf.add((this["metadata"], dcterms["created"], Literal(meta.created,datatype=XSD.date))) rdf.add((this["metadata"], dcterms["description"], Literal(meta.version))) rdf.add((this["metadata"], dcterms["valid"], Literal(meta.valid,datatype=XSD.date))) rdf.add((this["metadata"], dcterms["source"], Literal(meta.source))) for location in ellist: rdf.add((colon[location.id], vcard["fn"], Literal(location.name))) rdf.add((colon[location.id], vcard["extended-address"], Literal(location.address))) rdf.add((colon[location.id], vcard["category"], Literal(location.category))) rdf.add((colon[location.id], vcard["latitude"], Literal(location.lat))) rdf.add((colon[location.id], vcard["longitude"], Literal(location.long))) if(location.tel): rdf.add((colon[location.id], vcard["tel"], Literal(location.tel))) if(location.note): rdf.add((colon[location.id], vcard["note"], Literal(location.note))) rdf.add((colon[location.id], cs["opening"], Literal(location.opening))) rdf.add((colon[location.id], cs["closing"], Literal(location.closing))) print("Content-type: text/turtle; charset=UTF-8\n") print rdf.serialize(format="n3")
def make_property_graph(properties, args): graph = ConjunctiveGraph() output_graph = ConjunctiveGraph() ontologies = [ 'https://raw.githubusercontent.com/monarch-initiative/SEPIO-ontology/master/src/ontology/sepio.owl', 'https://raw.githubusercontent.com/monarch-initiative/GENO-ontology/develop/src/ontology/geno.owl', 'http://purl.obolibrary.org/obo/ro.owl', 'http://purl.obolibrary.org/obo/iao.owl', 'http://purl.obolibrary.org/obo/ero.owl', 'https://raw.githubusercontent.com/jamesmalone/OBAN/master/ontology/oban_core.ttl', 'http://purl.obolibrary.org/obo/pco.owl', 'http://purl.obolibrary.org/obo/xco.owl' ] for ontology in ontologies: print("parsing: " + ontology) try: graph.parse(ontology, format=rdflib_util.guess_format(ontology)) except SAXParseException as e: logger.error(e) logger.error('Retrying: ' + ontology) graph.parse(ontology, format="turtle") except OSError as e: # URLError: # simple retry logger.error(e) logger.error('Retrying: ' + ontology) graph.parse(ontology, format=rdflib_util.guess_format(ontology)) # Get object properties output_graph = add_property_to_graph( graph.subjects(RDF['type'], OWL['ObjectProperty']), output_graph, OWL['ObjectProperty'], properties) # Get annotation properties output_graph = add_property_to_graph( graph.subjects(RDF['type'], OWL['AnnotationProperty']), output_graph, OWL['AnnotationProperty'], properties) # Get data properties output_graph = add_property_to_graph( graph.subjects(RDF['type'], OWL['DatatypeProperty']), output_graph, OWL['DatatypeProperty'], properties) # Hardcoded properties output_graph.add( (URIRef('https://monarchinitiative.org/MONARCH_cliqueLeader'), RDF['type'], OWL['AnnotationProperty'])) output_graph.add( (URIRef('https://monarchinitiative.org/MONARCH_anonymous'), RDF['type'], OWL['AnnotationProperty'])) # Check monarch data triple data_url = "https://data.monarchinitiative.org/ttl/{0}".format( re.sub(r".*/", "", args.input)) new_url = "https://data.monarchinitiative.org/ttl/{0}".format( re.sub(r".*/", "", args.output)) if (URIRef(data_url), RDF.type, OWL['Ontology']) in output_graph: output_graph.remove(URIRef(data_url), RDF.type, OWL['Ontology']) output_graph.add((URIRef(new_url), RDF.type, OWL['Ontology'])) for row in output_graph.predicates( DC['source'], OWL['AnnotationProperty']): if row == RDF['type']: output_graph.remove( (DC['source'], RDF['type'], OWL['AnnotationProperty'])) output_graph.add((DC['source'], RDF['type'], OWL['ObjectProperty'])) return output_graph
class InMemoryStorage(object): def __init__(self): store = IOMemory() self.g = ConjunctiveGraph(store=store) self.g.bind("lada",ns_lada) self.g.bind('data', ns_data) self.g.bind('cube', ns_cube) self.g.bind('qb', ns_cube) self.g.bind('lcd', ns_lcd) self.g.bind('xsd', ns_xsd) self.g.bind('qb4cc', ns_qb4cc) self.g.bind('skos', ns_skos) self.initNs = { 'lada': ns_lada, 'data': ns_data, 'qb': ns_cube, 'lcd': ns_lcd, 'xsd': ns_xsd, 'qb4cc': ns_qb4cc, 'skos': ns_skos } def _concatenate_graphs(self, graphs): source = Graph() for g in graphs: if g in graph_dict: source += self.g.get_context(graph_dict[g]) elif type(g) is URIRef: source += self.g.get_context(g) return source def add_triple(self, triple, context): if context: if type(context) is str: self.g.get_context(graph_dict[context]).add(triple) else: self.g.get_context(context).add(triple) else: self.g.add(triple) def add_graph(self, graph, context): if context: g = None if type(context) is str: g = self.g.get_context(graph_dict[context]) else: g = self.g.get_context(context) g += graph else: self.g += graph def add_file(self, file, format, context): if context: if type(context) is str: self.g.get_context(graph_dict[context]).parse(file, format=format) else: self.g.get_context(context).parse(file, format=format) else: self.g.parse(file, format=format) def query(self, queryString, contexts): if contexts: if type(contexts) is list: return self._concatenate_graphs(contexts).query(queryString, initNs=self.initNs) elif type(contexts) is str: return self.g.get_context(graph_dict[contexts]).query(queryString, initNs=self.initNs) else: return self.g.get_context(contexts).query(queryString, initNs=self.initNs) else: return self.g.query(queryString, initNs=self.initNs) def value(self, subject, predicate, context): if context: if type(context) is str: return self.g.get_context(graph_dict[context]).value(subject, predicate) else: return self.g.get_context(context).value(subject, predicate) else: return self.g.value(subject, predicate) def remove(self, triple_pattern, contexts): if contexts: if type(contexts) is list: self._concatenate_graphs(contexts).remove(triple_pattern) else: self.g.get_context(graph_dict[contexts]).remove(triple_pattern) else: self.g.remove(triple_pattern) def clear(self, context): if context: if type(context) is str: self.g.remove_context(self.g.get_context(graph_dict[context])) else: self.g.remove_context(self.g.get_context(context)) else: self.g.remove( (None, None, None) ) def count_triples(self): c = 0; for s, p, o in self.g: c = c +1; return c def export(self, context): if type(context) is str: self.g.get_context(graph_dict[context]).serialize(context + ".ttl", format="turtle")
def query_lode(self, id): var = "http://inpho.cogs.indiana.edu/thinker/" + id # initialize dictionaries to store temporray results dbPropResults = {} inpho_DB = {} DB_inpho = {} dbpedia_web = {} triples = {} # init graphs for LODE and mapped data gLODE = ConjunctiveGraph() gReturn = ConjunctiveGraph() # import InPhO data gLODE.parse("http://inphodata.cogs.indiana.edu/lode/out_n3.20140207.rdf", format="n3") # builds a set of triples with the inpho id as the first entry and the # dbpedia id as the second resultsLODE = gLODE.query( """ SELECT ?thinker_LODE ?thinkerDB WHERE { ?thinker_LODE owl:sameAs ?thinkerDB FILTER (regex(str(?thinker_LODE),"http://inpho.cogs.indiana.edu","i") && regex(str(?thinkerDB),"http://dbpedia.org/resource/","i")). } """ ) # load in property mapping between inpho-dbpedia prop_map_filename = config.get_data_path("rdf_map.txt") with open(prop_map_filename, "r") as f: dbprops = csv.reader(f, delimiter="\t") for dbprop in dbprops: dbPropResults[dbprop[1]] = dbprop[0] dbpedia_web[dbprop[1].split(":")[1]] = dbprop[2] # iterate through triples and store mappings for triple in resultsLODE: inpho_DB[str(triple[0])] = str(triple[1]) # store the results in key as inpho url and value as dbpedia url DB_inpho[str(triple[1])] = str(triple[0]) # store the results in key as dbpedia url and value as inpho url # queries for all relationships in dbpedia sparqlDB = SPARQLWrapper("http://inpho-dataserve.cogs.indiana.edu:8890/sparql/") sparqlDB.setReturnFormat(JSON) for inpho, DB in inpho_DB.iteritems(): predicate = {} # for dbprop in dbPropResults: if str(DB_inpho.get(DB)) == var: for dbprop in dbPropResults: sparqlDB.setQuery( """ PREFIX dbpprop: <http://dbpedia.org/ontology/> SELECT ?b WHERE { <""" + DB + """> """ + dbprop + """ ?b. FILTER (regex(str(?b),"dbpedia.org/resource/","i")). }""" ) resultsDB = sparqlDB.query().convert() predicate[dbprop] = resultsDB["results"]["bindings"] triples[DB] = predicate # retrieve native python object c.entity = h.fetch_obj(Entity, id, new_id=True) existing_predicate_list = [] existing_object_list = [] predicates_to_compare = ["influenced", "influenced_by", "teachers", "students"] for subject, predicate in triples.iteritems(): for predicate1, objectn in predicate.iteritems(): predicate_to_match = predicate1.split(":")[1] attr = getattr(c.entity, dbpedia_web[predicate_to_match]) for attr1 in attr: if dbpedia_web[predicate_to_match] in predicates_to_compare: existing_predicate_list.append(dbpedia_web[predicate_to_match] + ":" + attr1.wiki) # maps from dbpedia relationships back to inpho relationships for subject, predicate in triples.iteritems(): # attr = getattr(c.entity, predicate) # raise Exception for predicate1, objectn in predicate.iteritems(): for object1 in objectn: # temp_str=dbpedia_web[predicate1.split(":")[1]] + ':'+str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1].replace("_"," ") temp_str = ( dbpedia_web[predicate1.split(":")[1]] + ":" + str(object1["b"]["value"]).split("/")[len(str(object1["b"]["value"]).split("/")) - 1] ) # raise Exception if temp_str not in existing_predicate_list: # returns the inphoid for the object DB_Entry = DB_inpho.get(object1["b"]["value"]) # reverse lookup for the inpho data check # if there is not an inpho id, leave it as the dbpedia id if DB_Entry == None: gReturn.add( (URIRef(subject), URIRef(dbPropResults.get(predicate1)), URIRef(object1["b"]["value"])) ) else: # return the properly mapped id # TODO: use attr to filter DB_Entry gReturn.add((URIRef(subject), URIRef(dbPropResults.get(predicate1)), URIRef(DB_Entry))) # if "Francisco" in str(object1['b']['value']).split("/")[len(str(object1['b']['value']).split("/"))-1].replace("_", ): # raise Exception return gReturn.serialize()
data = csv.DictReader(fd, delimiter="\t", quotechar='"', escapechar='') for r in data: raw_id = r['raw_id'] # Check if valid with regex match = re.match(r"^(tt)*(?P<id>\d{7,10}).*", raw_id) if not match: progress.count() wrongs.append(raw_id) continue imdb_id = match.group(2) film_node = n['Movie/tt' + imdb_id] # Create a node for dbpedia uri = r['uri'] wiki_node = URIRef(uri) g.add((film_node, n['has' + source + 'Node'], wiki_node)) progress.count() if progress.finished(): break g.serialize(destination=outfile, format='turtle') end = time.time() print('Wrong formatted IMDB IDs found: ', len(wrongs)) print(wrongs) print("Total Items Processed: ", progress.total) print("Total Time: ", end - start) g.close()
def encode_container(self, bundle, container=None, identifier=None): if container is None: container = ConjunctiveGraph(identifier=identifier) nm = container.namespace_manager nm.bind('prov', PROV.uri) for namespace in bundle.namespaces: container.bind(namespace.prefix, namespace.uri) id_generator = AnonymousIDGenerator() real_or_anon_id = lambda record: record._identifier.uri if \ record._identifier else id_generator.get_anon_id(record) for record in bundle._records: rec_type = record.get_type() if hasattr(record, 'identifier') and record.identifier: identifier = URIRef(six.text_type(real_or_anon_id(record))) container.add((identifier, RDF.type, URIRef(rec_type.uri))) else: identifier = None if record.attributes: bnode = None formal_objects = [] used_objects = [] all_attributes = list(record.formal_attributes) + list( record.attributes) formal_qualifiers = False for attrid, (attr, value) in enumerate( list(record.formal_attributes)): if (identifier is not None and value is not None) or \ (identifier is None and value is not None and attrid > 1): formal_qualifiers = True has_qualifiers = len( record.extra_attributes) > 0 or formal_qualifiers for idx, (attr, value) in enumerate(all_attributes): if record.is_relation(): if rec_type.namespace.prefix == 'prov': pred = URIRef(PROV[PROV_N_MAP[rec_type]].uri) else: pred = URIRef(PROVONE[PROVONE_N_MAP[rec_type]].uri) # create bnode relation if bnode is None: valid_formal_indices = set() for idx, (key, val) in enumerate( record.formal_attributes): formal_objects.append(key) if val: valid_formal_indices.add(idx) used_objects = [record.formal_attributes[0][0]] subj = None if record.formal_attributes[0][1]: subj = URIRef( record.formal_attributes[0][1].uri) if identifier is None and subj is not None: try: obj_val = record.formal_attributes[1][1] obj_attr = URIRef( record.formal_attributes[1][0].uri) # TODO: Why is obj_attr above not used anywhere? except IndexError: obj_val = None if obj_val and (rec_type not in { PROV_END, PROV_START, PROV_USAGE, PROV_GENERATION, PROV_DERIVATION, PROV_ASSOCIATION, PROV_INVALIDATION } or (valid_formal_indices == {0, 1} and len(record.extra_attributes) == 0)): used_objects.append( record.formal_attributes[1][0]) obj_val = self.encode_rdf_representation( obj_val) if rec_type == PROV_ALTERNATE: subj, obj_val = obj_val, subj container.add((subj, pred, obj_val)) if rec_type == PROV_MENTION: if record.formal_attributes[2][1]: used_objects.append( record.formal_attributes[2][0]) obj_val = self.encode_rdf_representation( record.formal_attributes[2][1]) container.add( (subj, URIRef( PROV['asInBundle'].uri), obj_val)) has_qualifiers = False if rec_type in [PROV_ALTERNATE]: continue if subj and (has_qualifiers or identifier): qualifier = rec_type._localpart rec_uri = rec_type.uri for attr_name, val in record.extra_attributes: if attr_name == PROV['type']: if PROV['Revision'] == val or \ PROV['Quotation'] == val or \ PROV['PrimarySource'] == val: qualifier = val._localpart rec_uri = val.uri if identifier is not None: container.remove( (identifier, RDF.type, URIRef(rec_type.uri))) QRole = URIRef(PROV['qualified' + qualifier].uri) if identifier is not None: container.add((subj, QRole, identifier)) else: bnode = identifier = BNode() container.add((subj, QRole, identifier)) container.add( (identifier, RDF.type, URIRef(rec_uri) )) # reset identifier to BNode if value is not None and attr not in used_objects: if attr in formal_objects: pred = attr2rdf(attr) elif attr == PROV['role']: pred = URIRef(PROV['hadRole'].uri) elif attr == PROV['plan']: pred = URIRef(PROV['hadPlan'].uri) elif attr == PROV['type']: pred = RDF.type elif attr == PROV['label']: pred = RDFS.label elif isinstance(attr, pm.QualifiedName): pred = URIRef(attr.uri) else: pred = self.encode_rdf_representation(attr) if PROV['plan'].uri in pred: pred = URIRef(PROV['hadPlan'].uri) if PROV['informant'].uri in pred: pred = URIRef(PROV['activity'].uri) if PROV['responsible'].uri in pred: pred = URIRef(PROV['agent'].uri) if rec_type == PROV_DELEGATION and \ PROV['activity'].uri in pred: pred = URIRef(PROV['hadActivity'].uri) if (rec_type in [PROV_END, PROV_START] and PROV['trigger'].uri in pred) or\ (rec_type in [PROV_USAGE] and PROV['used'].uri in pred): pred = URIRef(PROV['entity'].uri) if rec_type in [ PROV_GENERATION, PROV_END, PROV_START, PROV_USAGE, PROV_INVALIDATION ]: if PROV['time'].uri in pred: pred = URIRef(PROV['atTime'].uri) if PROV['ender'].uri in pred: pred = URIRef(PROV['hadActivity'].uri) if PROV['starter'].uri in pred: pred = URIRef(PROV['hadActivity'].uri) if PROV['location'].uri in pred: pred = URIRef(PROV['atLocation'].uri) if rec_type in [PROV_ACTIVITY]: if PROV_ATTR_STARTTIME in pred: pred = URIRef(PROV['startedAtTime'].uri) if PROV_ATTR_ENDTIME in pred: pred = URIRef(PROV['endedAtTime'].uri) if rec_type == PROV_DERIVATION: if PROV['activity'].uri in pred: pred = URIRef(PROV['hadActivity'].uri) if PROV['generation'].uri in pred: pred = URIRef(PROV['hadGeneration'].uri) if PROV['usage'].uri in pred: pred = URIRef(PROV['hadUsage'].uri) if PROV['usedEntity'].uri in pred: pred = URIRef(PROV['entity'].uri) container.add( (identifier, pred, self.encode_rdf_representation(value))) continue if value is None: continue if isinstance(value, pm.ProvRecord): obj = URIRef(six.text_type(real_or_anon_id(value))) else: # Assuming this is a datetime value obj = self.encode_rdf_representation(value) if attr == PROV['location']: pred = URIRef(PROV['atLocation'].uri) if False and isinstance(value, (URIRef, pm.QualifiedName)): if isinstance(value, pm.QualifiedName): value = URIRef(value.uri) container.add((identifier, pred, value)) else: container.add( (identifier, pred, self.encode_rdf_representation(obj))) continue if attr == PROV['type']: pred = RDF.type elif attr == PROV['label']: pred = RDFS.label elif attr == PROV_ATTR_STARTTIME: pred = URIRef(PROV['startedAtTime'].uri) elif attr == PROV_ATTR_ENDTIME: pred = URIRef(PROV['endedAtTime'].uri) else: pred = self.encode_rdf_representation(attr) container.add((identifier, pred, obj)) return container
def build_graph(self): graph = ConjunctiveGraph() graph.bind('sioc', SIOC) graph.bind('foaf', FOAF) graph.bind('rdfs', RDFS) graph.bind('dct', DCT) graph.bind('mvcb', MVCB) swaml = URIRef("http://swaml.berlios.de/doap#swaml") doc = URIRef(self.base) graph.add((doc, RDF.type, FOAF["Document"])) graph.add((doc, RDFS.label, Literal("RDF version of the message '%s' retrieved from MarkMail API" % self.key))) #FIXME: this should go out of this api graph.add((doc, MVCB.generatorAgent, swaml)) message = URIRef(self.get_uri()) graph.add((message, RDF.type, SIOC.Post)) graph.add((message, RDF.type, SIOCT.MailMessage)) graph.add((doc, FOAF.primaryTopic, message)) graph.add((message, SIOC.id, Literal(self.key))) graph.add((message, SIOC.link, URIRef("http://markmail.org/message/%s" % self.key))) #graph.add((message, SIOC.has_container,URIRef(self.config.get('base')+'forum'))) #graph.add((message, SIOC.has_creator, URIRef(self.getSender().getUri()))) graph.add((message, DCT.title, Literal(self.title))) #graph.add((message, DCT.created, Literal(self.getDate(), datatype=XSD[u'dateTime']))) graph.add((message, SIOC.content, Literal(self.content))) self.set_graph(graph)
def make_property_graph(properties, args): graph = ConjunctiveGraph() output_graph = ConjunctiveGraph() GH = 'https://raw.githubusercontent.com' OBO = 'https://purl.obolibrary.org/obo' ontologies = [ OBO + '/sepio.owl', OBO + '/geno.owl', OBO + '/iao.owl', OBO + '/ero.owl', OBO + '/pco.owl', OBO + '/xco.owl', OBO + '/ro.owl', GH + '/jamesmalone/OBAN/master/ontology/oban_core.ttl', ] for ontology in ontologies: print("parsing: " + ontology) try: graph.parse(ontology, format=rdflib_util.guess_format(ontology)) except SAXParseException as e: logger.error(e) logger.error('Retrying: ' + ontology) graph.parse(ontology, format="turtle") except OSError as e: # URLError: # simple retry logger.error(e) logger.error('Retrying: ' + ontology) graph.parse(ontology, format=rdflib_util.guess_format(ontology)) # Get object properties output_graph = add_property_to_graph( graph.subjects(RDF['type'], OWL['ObjectProperty']), output_graph, OWL['ObjectProperty'], properties) # Get annotation properties output_graph = add_property_to_graph( graph.subjects(RDF['type'], OWL['AnnotationProperty']), output_graph, OWL['AnnotationProperty'], properties) # Get data properties output_graph = add_property_to_graph( graph.subjects(RDF['type'], OWL['DatatypeProperty']), output_graph, OWL['DatatypeProperty'], properties) # Hardcoded properties output_graph.add( (URIRef('https://monarchinitiative.org/MONARCH_cliqueLeader'), RDF['type'], OWL['AnnotationProperty'])) output_graph.add( (URIRef('https://monarchinitiative.org/MONARCH_anonymous'), RDF['type'], OWL['AnnotationProperty'])) # Check monarch data triple data_url = "https://data.monarchinitiative.org/ttl/{0}".format( re.sub(r".*/", "", args.input)) new_url = "https://data.monarchinitiative.org/ttl/{0}".format( re.sub(r".*/", "", args.output)) if (URIRef(data_url), RDF.type, OWL['Ontology']) in output_graph: output_graph.remove(URIRef(data_url), RDF.type, OWL['Ontology']) output_graph.add((URIRef(new_url), RDF.type, OWL['Ontology'])) for row in output_graph.predicates(DC['source'], OWL['AnnotationProperty']): if row == RDF['type']: output_graph.remove( (DC['source'], RDF['type'], OWL['AnnotationProperty'])) output_graph.add((DC['source'], RDF['type'], OWL['ObjectProperty'])) return output_graph
def rdf_description(name, notation='xml'): """ Funtion takes title of node, and rdf notation. """ valid_formats = ["xml", "n3", "ntriples", "trix"] default_graph_uri = "http://gstudio.gnowledge.org/rdfstore" # default_graph_uri = "http://example.com/" configString = "/var/tmp/rdfstore" # Get the IOMemory plugin. store = plugin.get('IOMemory', Store)('rdfstore') # Open previously created store, or create it if it doesn't exist yet graph = Graph(store="IOMemory", identifier=URIRef(default_graph_uri)) path = mkdtemp() rt = graph.open(path, create=False) if rt == NO_STORE: graph.open(path, create=True) else: assert rt == VALID_STORE, "The underlying store is corrupt" # Now we'll add some triples to the graph & commit the changes #rdflib = Namespace('http://sbox.gnowledge.org/gstudio/') graph.bind("gstudio", "http://gnowledge.org/") exclusion_fields = [ "id", "rght", "node_ptr_id", "image", "lft", "_state", "_altnames_cache", "_tags_cache", "nid_ptr_id", "_mptt_cached_fields" ] #verifies the type of node node = NID.objects.get(title=name) node_type = node.reftype if (node_type == 'Gbobject'): node = Gbobject.objects.get(title=name) rdflib = link(node) elif (node_type == 'None'): node = Gbobject.objects.get(title=name) rdflib = link(node) elif (node_type == 'Processes'): node = Gbobject.objects.get(title=name) rdflib = link(node) elif (node_type == 'System'): node = Gbobject.objects.get(title=name) rdflib = link(node) elif (node_type == 'Objecttype'): node = Objecttype.objects.get(title=name) rdflib = link(node) elif (node_type == 'Attributetype'): node = Attributetype.objects.get(title=name) rdflib = link(node) elif (node_type == 'Complement'): node = Complement.objects.get(title=name) rdflib = link(node) elif (node_type == 'Union'): node = Union.objects.get(title=name) rdflib = link(node) elif (node_type == 'Intersection'): node = Intersection.objects.get(title=name) rdflib = link(node) elif (node_type == 'Expression'): node = Expression.objects.get(title=name) rdflib = link(node) elif (node_type == 'Processtype'): node = Processtype.objects.get(title=name) rdflib = link(node) elif (node_type == 'Systemtype'): node = Systemtype.objects.get(title=name) rdflib = link(node) elif (node_type == 'AttributeSpecification'): node = AttributeSpecification.objects.get(title=name) rdflib = link(node) elif (node_type == 'RelationSpecification'): node = RelationSpecification.objects.get(title=name) rdflib = link(node) elif (node_type == 'Attribute'): node = Attribute.objects.get(title=name) rdflib = Namespace('http://sbox.gnowledge.org/gstudio/') elif (node_type == 'Relationtype'): node = Relationtype.objects.get(title=name) rdflib = Namespace('http://sbox.gnowledge.org/gstudio/') elif (node_type == 'Metatype'): node = Metatype.objects.get(title=name) rdflib = Namespace('http://sbox.gnowledge.org/gstudio/') else: rdflib = Namespace('http://sbox.gnowledge.org/gstudio/') node_dict = node.__dict__ subject = str(node_dict['id']) for key in node_dict: if key not in exclusion_fields: predicate = str(key) pobject = str(node_dict[predicate]) graph.add((rdflib[subject], rdflib[predicate], Literal(pobject))) rdf_code = graph.serialize(format=notation) graph.commit() print rdf_code graph.close()
def build_graph(self): graph = ConjunctiveGraph() graph.bind('sioc', SIOC) graph.bind('foaf', FOAF) graph.bind('rdfs', RDFS) graph.bind('dct', DCT) graph.bind('mvcb', MVCB) swaml = URIRef("http://swaml.berlios.de/doap#swaml") doc = URIRef("%s/thread/%s" % (self.base, self.key)) graph.add((doc, RDF.type, FOAF["Document"])) graph.add((doc, RDFS.label, Literal("RDF version of the thread '%s' retrieved from MarkMail API" % self.key))) #FIXME: this should go out of this api graph.add((doc, MVCB.generatorAgent, swaml)) thread = URIRef("%s/thread/%s#thread" % (self.base, self.key)) graph.add((thread, RDF.type, SIOC["Thread"])) graph.add((doc, FOAF["primaryTopic"], thread)) graph.add((thread, SIOC.id, Literal(self.key))) graph.add((thread, SIOC.link, URIRef(self.homepage))) graph.add((thread, DCT.title, Literal(self.title))) graph.add((thread, SIOC.num_item, Literal(len(self.messages), XSD.Integer))) for message in self.messages: url = "%s/message/%s" % (self.base, message["id"]) post = URIRef("%s#message" % url) graph.add((post, RDF.type, SIOC.Post)) graph.add((post, RDFS.seeAlso, URIRef(url))) graph.add((thread, SIOC.container_of, post)) graph.add((post, SIOC.has_container, thread)) graph.add((post, SIOC.id, Literal(self.key))) graph.add((post, SIOC.link, URIRef("http://markmail.org%s" % message["url"]))) author = BNode() graph.add((post, SIOC.has_creator, author)) graph.add((author, RDF.type, SIOC.UserAccount)) graph.add((author, SIOC.name, Literal(message["from"]))) graph.add((post, DCT.created, Literal(message["date"], datatype=XSD.dateTime))) self.set_graph(graph)
class Hisco2RDF(): ''' Scrapes the HISCO Web site The hierarchy goes as "master > minor > rubri > micro" ''' def __init__(self): # The graph to store the data self.graph = ConjunctiveGraph() self.graph.namespace_manager.bind('skos', SKOS) self.graph.namespace_manager.bind('hisco', HISCO) self.graph.namespace_manager.bind('dcterms', DCTERMS) self.graph.namespace_manager.bind('sdmx-dimension', SDMX_DIMENSION) self.graph.namespace_manager.bind('sdmx-code', SDMX_CODE) self.graph.namespace_manager.bind('qb', QB) # SQLite DB for the cache self.cache = sqlite3.connect('cache.db') cursor = self.cache.cursor() cursor.execute( "CREATE TABLE IF NOT EXISTS page (url text, html text)") self.cache.commit() def __del__(self): self.cache.close() def get_page(self, url): #log.debug("Load %s" % url) c = self.cache.cursor() c.execute("SELECT * FROM page WHERE url = ?", (url, )) res = c.fetchone() doc = None if res == None: doc = requests.get(url).content c.execute("INSERT INTO page VALUES (?,?)", (url, doc)) self.cache.commit() else: (_, doc) = res return BeautifulSoup(doc) def save_output(self): # Add more things needed for DataCubes dimprop = HISCO['occupation'] self.graph.add((dimprop, RDF.type, QB['DimensionProperty'])) self.graph.add((dimprop, RDFS.range, SKOS.Collection)) self.graph.add((dimprop, QB['Concept'], SKOS.Collection)) self.graph.add( (dimprop, RDFS.label, Literal('Occupation code', lang='en'))) self.graph.add((dimprop, RDFS.comment, Literal('The HISCO group of the occupation', lang='en'))) # Print to the screen #outfile = sys.stdout.buffer #self.graph.serialize(destination=outfile, format='n3') # Save to the file outfile = open('../hisco.ttl', "wb") self.graph.serialize(destination=outfile, format='n3') outfile.close() def parse_hisco_tree(self): ''' Parse the hisco tree ''' # Load the page doc = self.get_page(ROOT + HISCO_TREE) # Find the major groups major_groups = [] major_group = None for table in doc.find_all('table', attrs={'border': '0'}): for row in table.find_all('tr'): for col in row.find_all('td'): # Skip empty rows if len(col.text) == 1: continue # We are starting a new group if col.text.startswith('Majorgroup'): # Save the one we were building if any if major_group != None: major_groups.append(major_group) m = re.search("Majorgroup ([^ ]*) ", col.text) major_group = {} major_group['title'] = col.text major_group['code'] = m.group(1).replace('/', '-') # We have a description if col.text.startswith('Workers'): major_group['description'] = col.text # We have links to minor if col.text.startswith('List Minor'): link = col.find_all('a')[0]['href'] major_group.setdefault('links', []) major_group['links'].append(link) # Add the last group in the making if major_group != None: major_groups.append(major_group) # Add the groups to the graph for group in major_groups: major_group_uri = self._get_group_uri(group['code']) self.graph.add((major_group_uri, RDF.type, SKOS['ConceptScheme'])) self.graph.add( (major_group_uri, DCTERMS.title, Literal(group['title']))) self.graph.add((major_group_uri, DCTERMS.description, Literal(group['description']))) # Now move onto the minor groups following the links for major_group in major_groups: major_group_uri = self._get_group_uri(major_group['code']) for minor_link in major_group['links']: # Look for the minor groups minor_groups = self._parse_records_table(minor_link, 2) # Add the groups to the graph for minor_group in minor_groups: minor_group_uri = self._get_group_uri(minor_group['code']) self.graph.add( (minor_group_uri, RDF.type, SKOS['ConceptScheme'])) self.graph.add((minor_group_uri, RDFS.label, Literal(minor_group['title']))) self.graph.add((minor_group_uri, DCTERMS.description, Literal(minor_group['description']))) self.graph.add( (major_group_uri, SKOS.related, minor_group_uri)) # Got one level deeper into the rubri for rubri_link in minor_group['links']: # Look for the minor groups rubri_groups = self._parse_records_table(rubri_link, 3) # Add the groups to the graph for rubri_group in rubri_groups: rubri_group_uri = self._get_group_uri( rubri_group['code']) self.graph.add((rubri_group_uri, RDF.type, SKOS['ConceptScheme'])) self.graph.add((rubri_group_uri, RDFS.label, Literal(rubri_group['title']))) self.graph.add( (rubri_group_uri, DCTERMS.description, Literal(rubri_group['description']))) self.graph.add((minor_group_uri, SKOS.related, rubri_group_uri)) # And one deeper for the micro for micro_link in rubri_group['links']: # Look for the minor groups micro_groups = self._parse_records_table( micro_link, 5) # Add the groups to the graph for micro_group in micro_groups: hisco_uri = self._get_hisco_uri( micro_group['code']) self.graph.add((hisco_uri, RDF.type, SKOS['Collection'])) self.graph.add( (hisco_uri, RDFS.label, Literal(micro_group['title']))) self.graph.add( (hisco_uri, DCTERMS.description, Literal(micro_group['description']))) self.graph.add((rubri_group_uri, SKOS.related, hisco_uri)) def parse_occupational_titles(self): ''' Scrape the section of the site about occupational titles Last page = http://historyofwork.iisg.nl/list_hiswi.php?step=1845&publish=Y&modus=ftsearch ''' parsed_status_page = set() next_page = OCCUPATIONAL_TITLES while next_page != None: log.info("Parse titles %s" % next_page) # Load the page doc = self.get_page(ROOT + next_page) # Find the right table table = doc.find('table', attrs={ 'cellspacing': '0', 'cellpadding': '2', 'border': '0' }) # Look for all the titles for row in table.find_all('tr')[1:]: # Skip the header cols = row.find_all('td') occupation_title = cols[1].text details_page_link = cols[1].find_all('a')[0]['href'] language = LANG_MAP[cols[2].text] hisco_code = cols[3].text.replace('*', '') # Get the DB index from details_page_link m = re.search('know_id=([^&]*)&', details_page_link) occupation_index = m.group(1) # Add the concept to the graph resource = self._get_occupation_title_uri(occupation_index) self.graph.add((resource, RDF.type, SKOS['Concept'])) self.graph.add((resource, SKOS.prefLabel, Literal(occupation_title, lang=language))) self.graph.add( (resource, SKOS.member, self._get_hisco_uri(hisco_code))) # Get more information about the title and add it as a member of the collection details_page = self.get_page(ROOT + details_page_link) details_table = details_page.find('table', attrs={ 'cellspacing': '8', 'cellpadding': '0' }) keyvalues = {} for details_row in details_table.find_all('tr'): details_cols = details_row.find_all('td') keyvalues[details_cols[0].text.strip()] = details_cols[-1] # We already dealt with these two del keyvalues['Hisco code'] del keyvalues['Occupational title'] # TODO Country , use refArea # TODO Language # Do we know the gender ? if 'Gender' in keyvalues: sex = SDMX_CODE['sex-U'] # Also applies to "Male/Female" if keyvalues['Gender'].text.strip() == 'Male': sex = SDMX_CODE['sex-M'] elif keyvalues['Gender'].text.strip() == 'Female': sex = SDMX_CODE['sex-F'] self.graph.add((resource, SDMX_DIMENSION['sex'], sex)) del keyvalues['Gender'] # Do we know the status ? if 'Status' in keyvalues: # Add the status status = keyvalues['Status'].text.strip() self.graph.add((resource, HISCO['status'], self._get_status_uri(status))) # Parse the status page if necessary status_page = keyvalues['Status'].find_all('a')[0]['href'] if status_page not in parsed_status_page: self._parse_status_page(status_page) parsed_status_page.add(status_page) del keyvalues['Status'] # TODO Relation # TODO Product # TODO Provenance # Do we have a translation in English ? if 'Translation' in keyvalues: trans = Literal( keyvalues['Translation'].text.strip().replace( '´', "'"), lang='en') self.graph.add((resource, SKOS.altLabel, trans)) del keyvalues['Translation'] # Print whatever is left #if len(keyvalues.keys()) != 0: # log.info(keyvalues.keys()) # Look for the "next" link next_table = doc.find('table', class_='nextprev') next_page = None for link in next_table.find_all('a'): if 'Next' in link.text: next_page = link['href'] def _parse_status_page(self, url): ''' Parses a status page such as http://historyofwork.iisg.nl/status.php?int02=32 ''' # Work-around broken content if url == 'status.php?int02=15': return # Load the page doc = self.get_page(ROOT + url) # Find the data about this status status_uri = None for line in doc.find('pre').text.split('\n'): if re.match("^[0-9]* [a-zA-Z]*", line): m = re.search("^([0-9]*) ([a-zA-Z]*)", line) status_uri = self._get_status_uri(m.group(1)) self.graph.add((status_uri, RDF.type, HISCO['Status'])) self.graph.add((status_uri, RDFS.label, Literal(m.group(2)))) self.graph.add( (status_uri, SKOS.prefLabel, Literal(m.group(2)))) self.graph.add( (status_uri, SKOS.notation, Literal(m.group(1)))) if re.match("^[A-Z]{2}:\t[a-zA-Z]*", line): m = re.search("^([A-Z]{2}):\t([a-zA-Z]*)", line) lang_code = m.group(1).lower() label = Literal(m.group(2), lang=lang_code) self.graph.add((status_uri, SKOS.altLabel, label)) # Describe the class status_class = HISCO['Status'] descr = doc.find('table', attrs={ 'width': '600' }).text.strip().split('\r\n') self.graph.add((status_class, RDF.type, RDFS.Class)) self.graph.add((status_class, RDFS.label, Literal("Status code"))) self.graph.add((status_class, DCTERMS.comment, Literal(descr[1]))) # Describe the property status_property = HISCO['status'] self.graph.add((status_property, RDF.type, RDF.Property)) self.graph.add((status_property, RDFS.label, Literal("status associated to the occupation"))) self.graph.add((status_property, RDFS.range, HISCO['Status'])) self.graph.add((status_property, RDFS.domain, SKOS.Concept)) def _parse_records_table(self, url, size): ''' Minor, Rubri and Micro have the same structure except an additional column for Micro with links to the titles ''' # Load the page doc = self.get_page(ROOT + url) # Find the right table table = doc.find('table', attrs={ 'cellspacing': '8', 'cellpadding': '0' }) # If we can't find the table return an empty list # work around for http://historyofwork.iisg.nl/list_micro.php?keywords=920&keywords_qt=lstrict if table == None: return [] # Look for the minor groups groups = [] group = None columns = table.find_all('td') for index in range(0, len(columns)): # New group if re.match("[0-9]{%d}" % size, columns[index].text): if group != None: groups.append(group) group = {} group['code'] = columns[index].text group['title'] = columns[index + 1].text link = columns[index + 1].find_all('a')[0]['href'] group.setdefault('links', []) group['links'].append(link) group['description'] = columns[index + 2].text if columns[index + 3].text == "Display Titles": link = columns[index + 3].find_all('a')[0]['href'] group['titles_link'] = link groups.append(group) return groups def _get_group_uri(self, code): return HISCO['group-%s' % code] def _get_hisco_uri(self, code): return HISCO['hisco-%s' % code] def _get_occupation_title_uri(self, code): return HISCO['occupation-%s' % code] def _get_status_uri(self, code): return HISCO['status-%s' % code]
def encode_container(self, bundle, container=None, identifier=None): if container is None: container = ConjunctiveGraph(identifier=identifier) nm = container.namespace_manager nm.bind('prov', PROV.uri) for namespace in bundle.namespaces: container.bind(namespace.prefix, namespace.uri) id_generator = AnonymousIDGenerator() real_or_anon_id = lambda record: record._identifier.uri if \ record._identifier else id_generator.get_anon_id(record) for record in bundle._records: rec_type = record.get_type() if hasattr(record, 'identifier') and record.identifier: identifier = URIRef(text_type(real_or_anon_id(record))) container.add((identifier, RDF.type, URIRef(rec_type.uri))) else: identifier = None if record.attributes: bnode = None formal_objects = [] used_objects = [] all_attributes = list(record.formal_attributes) + list(record.attributes) formal_qualifiers = False for attrid, (attr, value) in enumerate(list(record.formal_attributes)): if (identifier is not None and value is not None) or \ (identifier is None and value is not None and attrid > 1): formal_qualifiers = True has_qualifiers = len(record.extra_attributes) > 0 or formal_qualifiers for idx, (attr, value) in enumerate(all_attributes): if record.is_relation(): pred = URIRef(PROV[PROV_N_MAP[rec_type]].uri) # create bnode relation if bnode is None: valid_formal_indices = set() for idx, (key, val) in enumerate(record.formal_attributes): formal_objects.append(key) if val: valid_formal_indices.add(idx) used_objects = [record.formal_attributes[0][0]] subj = None if record.formal_attributes[0][1]: subj = URIRef(record.formal_attributes[0][1].uri) if identifier is None and subj is not None: try: obj_val = record.formal_attributes[1][1] obj_attr = URIRef(record.formal_attributes[1][0].uri) except IndexError: obj_val = None if obj_val and (rec_type not in [PROV_END, PROV_START, PROV_USAGE, PROV_GENERATION, PROV_DERIVATION, PROV_INVALIDATION] or (valid_formal_indices == {0, 1} and len(record.extra_attributes) == 0)): used_objects.append(record.formal_attributes[1][0]) obj_val = self.encode_rdf_representation(obj_val) if rec_type == PROV_ALTERNATE: subj, obj_val = obj_val, subj container.add((subj, pred, obj_val)) if rec_type == PROV_MENTION: if record.formal_attributes[2][1]: used_objects.append(record.formal_attributes[2][0]) obj_val = self.encode_rdf_representation(record.formal_attributes[2][1]) container.add((subj, URIRef(PROV['asInBundle'].uri), obj_val)) has_qualifiers = False if rec_type in [PROV_ALTERNATE]: #, PROV_ASSOCIATION]: continue if subj and (has_qualifiers or identifier): #and (len(record.extra_attributes) > 0 or identifier): qualifier = rec_type._localpart rec_uri = rec_type.uri for attr_name, val in record.extra_attributes: if attr_name == PROV['type']: if PROV['Revision'] == val or \ PROV['Quotation'] == val or \ PROV['PrimarySource'] == val: qualifier = val._localpart rec_uri = val.uri if identifier is not None: container.remove((identifier, RDF.type, URIRef(rec_type.uri))) QRole = URIRef(PROV['qualified' + qualifier].uri) if identifier is not None: container.add((subj, QRole, identifier)) else: bnode = identifier = BNode() container.add((subj, QRole, identifier)) container.add((identifier, RDF.type, URIRef(rec_uri))) # reset identifier to BNode if value is not None and attr not in used_objects: if attr in formal_objects: pred = attr2rdf(attr) elif attr == PROV['role']: pred = URIRef(PROV['hadRole'].uri) elif attr == PROV['plan']: pred = URIRef(PROV['hadPlan'].uri) elif attr == PROV['type']: pred = RDF.type elif attr == PROV['label']: pred = RDFS.label elif isinstance(attr, QualifiedName): pred = URIRef(attr.uri) else: pred = self.encode_rdf_representation(attr) if PROV['plan'].uri in pred: pred = URIRef(PROV['hadPlan'].uri) if PROV['informant'].uri in pred: pred = URIRef(PROV['activity'].uri) if PROV['responsible'].uri in pred: pred = URIRef(PROV['agent'].uri) if rec_type == PROV_DELEGATION and PROV['activity'].uri in pred: pred = URIRef(PROV['hadActivity'].uri) if (rec_type in [PROV_END, PROV_START] and PROV['trigger'].uri in pred) or\ (rec_type in [PROV_USAGE] and PROV['used'].uri in pred): pred = URIRef(PROV['entity'].uri) if rec_type in [PROV_GENERATION, PROV_END, PROV_START, PROV_USAGE, PROV_INVALIDATION]: if PROV['time'].uri in pred: pred = URIRef(PROV['atTime'].uri) if PROV['ender'].uri in pred: pred = URIRef(PROV['hadActivity'].uri) if PROV['starter'].uri in pred: pred = URIRef(PROV['hadActivity'].uri) if PROV['location'].uri in pred: pred = URIRef(PROV['atLocation'].uri) if rec_type in [PROV_ACTIVITY]: if PROV_ATTR_STARTTIME in pred: pred = URIRef(PROV['startedAtTime'].uri) if PROV_ATTR_ENDTIME in pred: pred = URIRef(PROV['endedAtTime'].uri) if rec_type == PROV_DERIVATION: if PROV['activity'].uri in pred: pred = URIRef(PROV['hadActivity'].uri) if PROV['generation'].uri in pred: pred = URIRef(PROV['hadGeneration'].uri) if PROV['usage'].uri in pred: pred = URIRef(PROV['hadUsage'].uri) if PROV['usedEntity'].uri in pred: pred = URIRef(PROV['entity'].uri) container.add((identifier, pred, self.encode_rdf_representation(value))) continue if value is None: continue if isinstance(value, ProvRecord): obj = URIRef(text_type(real_or_anon_id(value))) else: # Assuming this is a datetime value obj = self.encode_rdf_representation(value) if attr == PROV['location']: pred = URIRef(PROV['atLocation'].uri) if False and isinstance(value, (URIRef, QualifiedName)): if isinstance(value, QualifiedName): value = URIRef(value.uri) container.add((identifier, pred, value)) else: container.add((identifier, pred, self.encode_rdf_representation(obj))) continue if attr == PROV['type']: pred = RDF.type elif attr == PROV['label']: pred = RDFS.label elif attr == PROV_ATTR_STARTTIME: pred = URIRef(PROV['startedAtTime'].uri) elif attr == PROV_ATTR_ENDTIME: pred = URIRef(PROV['endedAtTime'].uri) else: pred = self.encode_rdf_representation(attr) container.add((identifier, pred, obj)) return container
class TestKyotoCabinetConjunctiveGraphCore(unittest.TestCase): def setUp(self): store = "KyotoCabinet" self.graph = ConjunctiveGraph(store=store) self.path = configString self.graph.open(self.path, create=True) def tearDown(self): self.graph.destroy(self.path) try: self.graph.close() except: pass if getattr(self, "path", False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): for f in os.listdir(self.path): os.unlink(self.path + "/" + f) os.rmdir(self.path) elif len(self.path.split(":")) == 1: os.unlink(self.path) else: os.remove(self.path) def test_namespaces(self): self.graph.bind("dc", "http://http://purl.org/dc/elements/1.1/") self.graph.bind("foaf", "http://xmlns.com/foaf/0.1/") self.assert_(len(list(self.graph.namespaces())) == 5) self.assert_(("foaf", rdflib.term.URIRef(u"http://xmlns.com/foaf/0.1/")) in list(self.graph.namespaces())) def test_play_journal(self): self.assertRaises(NotImplementedError, self.graph.store.play_journal, {"graph": self.graph}) def test_readable_index(self): print(readable_index(111)) def test_triples_context_reset(self): michel = rdflib.URIRef(u"michel") likes = rdflib.URIRef(u"likes") pizza = rdflib.URIRef(u"pizza") cheese = rdflib.URIRef(u"cheese") self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() ntriples = self.graph.triples((None, None, None), context=self.graph.store) self.assert_(len(list(ntriples)) == 2) def test_remove_context_reset(self): michel = rdflib.URIRef(u"michel") likes = rdflib.URIRef(u"likes") pizza = rdflib.URIRef(u"pizza") cheese = rdflib.URIRef(u"cheese") self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() self.graph.store.remove((michel, likes, cheese), self.graph.store) self.graph.commit() self.assert_(len(list(self.graph.triples((None, None, None), context=self.graph.store))) == 1) def test_remove_db_exception(self): michel = rdflib.URIRef(u"michel") likes = rdflib.URIRef(u"likes") pizza = rdflib.URIRef(u"pizza") cheese = rdflib.URIRef(u"cheese") self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() self.graph.store.__len__(context=self.graph.store) self.assert_(len(list(self.graph.triples((None, None, None), context=self.graph.store))) == 2)
class RDFAggregator(Aggregator): def __init__(self, *args, **kw): """Inicializa o agregador RDF. """ super(RDFAggregator, self).__init__('csv', *args, **kw) self.aggregator = ConjunctiveGraph() self.aggregator.bind(u'owl', OWL) self.aggregator.bind(u'lic', LIC) self.aggregator.bind(u'siorg', SIORG) self.aggregator.bind(u'siafi', SIAFI) self.aggregator.bind(u'geo', GEO) self.aggregator.bind(u'dbpedia', DBPEDIA) self.aggregator.bind(u'dbprop', DBPROP) self.aggregator.bind(u'dbo', DBONT) self.aggregator.bind(u'void', VOID) self.aggregator.bind(u'foaf', FOAF) self.aggregator.bind(u'vcard', VCARD) def add(self, obj): """Acrescenta as triplas do objeto ao grafo agregador. """ if getattr(obj, 'repr_rdf', None): # objeto tem um metodo para representacao propria em rdf triplas = obj.repr_rdf() for t in triplas: self.aggregator.add(t) else: # o objeto nao tem o metodo, tenta criar triplas por heuristicas subject = obj.uri doc = obj.doc_uri if doc == subject: doc = None class_uri = getattr(obj.__class__, '__class_uri__', None) expostos = getattr(obj.__class__,self.atributo_serializar, set()) prop_map = getattr(obj.__class__, '__rdf_prop__', {}) g = self.aggregator # classe if class_uri: g.add((URIRef(subject), RDF['type'], URIRef(class_uri))) # documento if doc: g.add((URIRef(doc), RDF['type'], FOAF['Document'])) g.add((URIRef(subject), FOAF['isPrimaryTopicOf'], URIRef(doc))) g.add((URIRef(doc), FOAF['primaryTopic'], URIRef(subject))) # nome if getattr(obj, 'nome', None): if getattr(obj, '__rdf_prop__', None) is None or \ obj.__rdf_prop__.get('nome', None) is None: g.add((URIRef(subject), RDFS['label'], Literal(obj.nome))) # localizacao geo if getattr(obj, 'geo_ponto', None): ponto = obj.geo_ponto if ponto: g.add((URIRef(subject), GEO['lat'], Literal(ponto['lat']))) g.add((URIRef(subject), GEO['long'], Literal(ponto['lon']))) # propriedades for atr in expostos: if atr in prop_map.keys(): if getattr(prop_map[atr], '__call__', None): # as triplas da propriedade sao dadas por uma funcao triplas = prop_map[atr](obj) if triplas: for t in triplas: g.add(t) elif prop_map[atr].get('metodo', None): # as triplas da propriedade sao dadas por um metodo m = getattr(obj, prop_map[atr]['metodo']) triplas = m(atr) if triplas: for t in triplas: g.add(t) elif prop_map[atr].get('pred_uri', None): # a propriedade corresponde a uma unica tripla pred_uri = prop_map[atr]['pred_uri'] object = getattr(obj, atr, None) if object: obj_uri = getattr(object, 'uri', lambda: None)() obj_cls_uri = getattr(object, '__class_uri__', None) # o objeto tem uri definida? if obj_uri: g.add((URIRef(subject), URIRef(pred_uri), URIRef(obj_uri))) elif obj_cls_uri: # se o objeto nao tem uri mas tem uri da classe, # tenta criar blank node bn = BNode() g.add((URIRef(subject), URIRef(pred_uri), bn)) g.add((bn, RDF['type'], URIRef(obj_cls_uri))) g.add((bn, RDFS['comment'], Literal(unicode(obj)))) else: # caso contrario, tratar a propriedade como um literal g.add((URIRef(subject), URIRef(pred_uri), Literal(unicode(object)))) def serialize(self, format="n3"): """Retorna a serializacao do agregador RDF (uniao dos grafos). """ format_map = { 'xml': 'xml', 'rdf': 'pretty-xml', 'rdf/xml': 'pretty-xml', 'ttl': 'n3', 'n3': 'n3', 'nt': 'nt', } f = format_map.get(format, 'n3') current_url = self.dataset_split.get('current_url', '') # url do documento atual dataset_url = self.dataset_split.get('dataset_url', '') # url geral do dataset next_url = self.dataset_split.get('next_url', '') # url da proxima pagina # a uri do dataset: url do documento acrescida de #dataset if current_url: self.aggregator.add((URIRef(current_url+"#dataset"),RDF['type'],VOID['Dataset'])) self.aggregator.add((URIRef(current_url),RDF['type'],VOID['DatasetDescription'])) self.aggregator.add((URIRef(current_url),FOAF['primaryTopic'],URIRef(current_url+"#dataset"))) if next_url: self.aggregator.add((URIRef(current_url+"#dataset"),RDFS['seeAlso'],URIRef(next_url+"#dataset"))) if next_url: self.aggregator.add((URIRef(next_url+"#dataset"),RDF['type'], VOID['Dataset'])) self.aggregator.add((URIRef(next_url),RDF['type'],VOID['DatasetDescription'])) self.aggregator.add((URIRef(next_url),FOAF['primaryTopic'],URIRef(next_url+"#dataset"))) if dataset_url: self.aggregator.add((URIRef(dataset_url+"#dataset"),RDF['type'], VOID['Dataset'])) self.aggregator.add((URIRef(dataset_url),RDF['type'],VOID['DatasetDescription'])) self.aggregator.add((URIRef(dataset_url),FOAF['primaryTopic'],URIRef(dataset_url+"#dataset"))) if current_url: self.aggregator.add((URIRef(dataset_url+"#dataset"),VOID['subset'],URIRef(current_url+"#dataset"))) if next_url: self.aggregator.add((URIRef(dataset_url+"#dataset"),VOID['subset'],URIRef(next_url+"#dataset"))) return self.aggregator.serialize(format=f)
from rdflib.graph import ConjunctiveGraph from rdflib.namespace import Namespace, RDF from rdflib.term import BNode, Literal DC = Namespace(u"http://purl.org/dc/elements/1.1/") FUNC = Namespace(u"http://example.org/functions#") _XSD_NS = Namespace('http://www.w3.org/2001/XMLSchema#') graph = ConjunctiveGraph() graph.add((BNode(), RDF.value, Literal(0))) graph.add((BNode(), RDF.value, Literal(1))) graph.add((BNode(), RDF.value, Literal(2))) graph.add((BNode(), RDF.value, Literal(3))) from rdflib.term import _toPythonMapping NUMERIC_TYPES = [type_uri for type_uri in _toPythonMapping if \ _toPythonMapping[type_uri] in (int, float, long)] def func_even(a): # Should this be required, or be done automatically? from rdflib.sparql.sparqlOperators import getValue value = getValue(a) if isinstance(value, Literal) and value.datatype in NUMERIC_TYPES: return Literal(int(value.toPython() % 2 == 0), datatype=_XSD_NS.boolean) else: raise TypeError(a) def test_even_extension(): res = list(graph.query("""
class ContextTest(TestCase): """ Testing different contexts. Heavily based on https://github.com/RDFLib/rdflib-postgresql/blob/master/test/context_case.py """ # noqa: E501 store_name = "Django" storetest = True path = "" create = True michel = URIRef(u'michel') tarek = URIRef(u'tarek') bob = URIRef(u'bob') likes = URIRef(u'likes') hates = URIRef(u'hates') pizza = URIRef(u'pizza') cheese = URIRef(u'cheese') c1 = URIRef(u'context-1') c2 = URIRef(u'context-2') def setUp(self): self.graph = ConjunctiveGraph(store=self.store_name) self.graph.destroy(self.path) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) self.graph.close() def get_context(self, identifier): assert isinstance(identifier, URIRef) or isinstance(identifier, BNode), type(identifier) # noqa: E501 return Graph(store=self.graph.store, identifier=identifier, namespace_manager=self) # noqa: E501 def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) self.assertEquals(len(graph), 2) self.assertEquals(len(self.graph), 2) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for _ in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEquals(len(graph), oldLen + 10) self.assertEquals(len(self.get_context(c1)), oldLen + 10) self.graph.remove_context(self.get_context(c1)) self.assertEquals(len(self.graph), oldLen) self.assertEquals(len(graph), 0) def testLenInMultipleContexts(self): oldLen = len(self.graph) self.addStuffInMultipleContexts() # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 self.assertEquals(len(self.graph), oldLen + 1) graph = Graph(self.graph.store, self.c1) self.assertEquals(len(graph), oldLen + 1) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assertIn(triple, self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assertIn(triple, self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assertIn(triple, self.graph) self.graph.remove(triple) # now gone! self.assertNotIn(triple, self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assertNotIn(triple, self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) self.addStuffInMultipleContexts() def cid(c): if not isinstance(c, str): return c.identifier return c self.assertIn(self.c1, [cid(c) for c in self.graph.contexts()]) self.assertIn(self.c2, [cid(c) for c in self.graph.contexts()]) contextList = [cid(c) for c in self.graph.contexts(triple)] self.assertIn(self.c1, contextList) self.assertIn(self.c2, contextList) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEquals(len(Graph(self.graph.store, c1)), 1) self.assertEquals(len(self.get_context(c1)), 1) self.graph.remove_context(self.get_context(c1)) self.assert_(self.c1 not in self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEquals(len(self.graph), 0) def testTriples(self): triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, self.c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context self.assertEquals( len( list( c1triples((Any, self.likes, self.pizza)) ) ), 2 ) self.assertEquals( len( list( c1triples((Any, self.hates, self.pizza)) ) ), 1 ) self.assertEquals( len( list( c1triples((Any, self.likes, self.cheese)) ) ), 3 ) self.assertEquals( len( list( c1triples((Any, self.hates, self.cheese)) ) ), 0 ) # unbound subjects without context, same results! self.assertEquals(len(list(triples((Any, self.likes, self.pizza)))), 2) self.assertEquals(len(list(triples((Any, self.hates, self.pizza)))), 1) self.assertEquals(len(list(triples((Any, self.likes, self.cheese)))), 3) self.assertEquals(len(list(triples((Any, self.hates, self.cheese)))), 0) # unbound objects with context self.assertEquals(len(list(c1triples((self.michel, self.likes, Any)))), 2) self.assertEquals(len(list(c1triples((self.tarek, self.likes, Any)))), 2) self.assertEquals(len(list(c1triples((self.bob, self.hates, Any)))), 2) self.assertEquals(len(list(c1triples((self.bob, self.likes, Any)))), 1) # unbound objects without context, same results! self.assertEquals(len(list(triples((self.michel, self.likes, Any)))), 2) self.assertEquals(len(list(triples((self.tarek, self.likes, Any)))), 2) self.assertEquals(len(list(triples((self.bob, self.hates, Any)))), 2) self.assertEquals(len(list(triples((self.bob, self.likes, Any)))), 1) # unbound predicates with context self.assertEquals(len(list(c1triples((self.michel, Any, self.cheese)))), 1) self.assertEquals(len(list(c1triples((self.tarek, Any, self.cheese)))), 1) self.assertEquals(len(list(c1triples((self.bob, Any, self.pizza)))), 1) self.assertEquals(len(list(c1triples((self.bob, Any, self.michel)))), 1) # unbound predicates without context, same results! self.assertEquals(len(list(triples((self.michel, Any, self.cheese)))), 1) self.assertEquals(len(list(triples((self.tarek, Any, self.cheese)))), 1) self.assertEquals(len(list(triples((self.bob, Any, self.pizza)))), 1) self.assertEquals(len(list(triples((self.bob, Any, self.michel)))), 1) # unbound subject, objects with context self.assertEquals(len(list(c1triples((Any, self.hates, Any)))), 2) self.assertEquals(len(list(c1triples((Any, self.likes, Any)))), 5) # unbound subject, objects without context, same results! self.assertEquals(len(list(triples((Any, self.hates, Any)))), 2) self.assertEquals(len(list(triples((Any, self.likes, Any)))), 5) # unbound predicates, objects with context self.assertEquals(len(list(c1triples((self.michel, Any, Any)))), 2) self.assertEquals(len(list(c1triples((self.bob, Any, Any)))), 3) self.assertEquals(len(list(c1triples((self.tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! self.assertEquals(len(list(triples((self.michel, Any, Any)))), 2) self.assertEquals(len(list(triples((self.bob, Any, Any)))), 3) self.assertEquals(len(list(triples((self.tarek, Any, Any)))), 2) # unbound subjects, predicates with context self.assertEquals(len(list(c1triples((Any, Any, self.pizza)))), 3) self.assertEquals(len(list(c1triples((Any, Any, self.cheese)))), 3) self.assertEquals(len(list(c1triples((Any, Any, self.michel)))), 1) # unbound subjects, predicates without context, same results! self.assertEquals(len(list(triples((Any, Any, self.pizza)))), 3) self.assertEquals(len(list(triples((Any, Any, self.cheese)))), 3) self.assertEquals(len(list(triples((Any, Any, self.michel)))), 1) # all unbound with context self.assertEquals(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! self.assertEquals(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.get_context(self.c1)]: # unbound subjects self.assertEquals(set(c.subjects(self.likes, self.pizza)), {self.michel, self.tarek}) self.assertEquals(set(c.subjects(self.hates, self.pizza)), {self.bob}) self.assertEquals(set(c.subjects(self.likes, self.cheese)), {self.tarek, self.bob, self.michel}) self.assertEquals(set(c.subjects(self.hates, self.cheese)), set()) # unbound objects self.assertEquals(set(c.objects(self.michel, self.likes)), {self.cheese, self.pizza}) self.assertEquals(set(c.objects(self.tarek, self.likes)), {self.cheese, self.pizza}) self.assertEquals(set(c.objects(self.bob, self.hates)), {self.michel, self.pizza}) self.assertEquals(set(c.objects(self.bob, self.likes)), {self.cheese}) # unbound predicates self.assertEquals( set( c.predicates(self.michel, self.cheese) ), {self.likes} ) self.assertEquals( set( c.predicates(self.tarek, self.cheese) ), {self.likes} ) self.assertEquals(set(c.predicates(self.bob, self.pizza)), {self.hates}) self.assertEquals(set(c.predicates(self.bob, self.michel)), {self.hates}) self.assertEquals(set(c.subject_objects(self.hates)), {(self.bob, self.pizza), (self.bob, self.michel)}) self.assertEquals(set(c.subject_objects(self.likes)), {(self.tarek, self.cheese), (self.michel, self.cheese), (self.michel, self.pizza), (self.bob, self.cheese), (self.tarek, self.pizza)}) self.assertEquals(set(c.predicate_objects(self.michel)), {(self.likes, self.cheese), (self.likes, self.pizza)}) self.assertEquals(set(c.predicate_objects(self.bob)), {(self.likes, self.cheese), (self.hates, self.pizza), (self.hates, self.michel)}) self.assertEquals(set(c.predicate_objects(self.tarek)), {(self.likes, self.cheese), (self.likes, self.pizza)}) self.assertEquals(set(c.subject_predicates(self.pizza)), {(self.bob, self.hates), (self.tarek, self.likes), (self.michel, self.likes)}) self.assertEquals(set(c.subject_predicates(self.cheese)), {(self.bob, self.likes), (self.tarek, self.likes), (self.michel, self.likes)}) self.assertEquals(set(c.subject_predicates(self.michel)), {(self.bob, self.hates)}) self.assertEquals(set(c), {(self.bob, self.hates, self.michel), (self.bob, self.likes, self.cheese), (self.tarek, self.likes, self.pizza), (self.michel, self.likes, self.pizza), (self.michel, self.likes, self.cheese), (self.bob, self.hates, self.pizza), (self.tarek, self.likes, self.cheese)}) # remove stuff and make sure the graph is empty again self.removeStuff() self.assertEquals(len(list(c1triples((Any, Any, Any)))), 0) self.assertEquals(len(list(triples((Any, Any, Any)))), 0)
def createRDF(username, city, artists, locationInformation, events, genres): graph = ConjunctiveGraph() rdfs = Namespace('http://www.w3.org/2000/01/rdf-schema#') iwa = Namespace('http://iwa2012-18-project.appspot.com/') lfm = Namespace('http://iwa2012-18-project.appspot.com/lastfm/') ev = Namespace('http://iwa2012-18-project.appspot.com/event/') dbp = Namespace('http://dbpedia.org/resource/') # DBPedia link to artists, genres and cities for artist in artists: graph.add(( lfm[username], iwa['likesArtist'], dbp[artist.replace(" ","_")] )) graph.add(( dbp[artist.replace(" ","_")], rdfs['label'], Literal(artist) )) for location in locationInformation: graph.add(( dbp[city.replace(" ","_")], iwa['poi'], Literal(location) )) for event in events: try: graph.add(( ev[event[0]], ev['onDate'], Literal(str(event[1].year)+"-"+str(event[1].month)+"-"+str(event[1].day),datatype=XSD.date) )) graph.add(( ev[event[0]], rdfs['label'], Literal(event[2]) )) graph.add(( ev[event[0]], ev['artist'], dbp[event[3].replace(" ","_")] )) graph.add(( ev[event[0]], ev['venue'], Literal(event[4]) )) graph.add(( ev[event[0]], ev['city'], dbp[city.replace(" ","_")] )) for eventGenre in event[5]: graph.add(( ev[event[0]], ev['genre'], dbp[eventGenre.replace(" ","_")] )) except AttributeError: graph.add(( ev[event[0]], rdfs['label'], Literal("Event is missing information") )) for genre in genres: graph.add(( lfm[username], iwa['likesGenre'], dbp[genre.replace(" ","_")] )) graph.add(( dbp[genre.replace(" ","_")], rdfs['label'], Literal(genre) )) graph.add(( dbp[city.replace(" ","_")], rdfs['label'], Literal(city) )) return graph