class ShExManifest: def __init__(self, file_loc: str, manifest_format: str = 'json-ld', shex_format=None) -> None: """ A ShEx Manifest traversal tool :param file_loc: Location of the manifest file :param manifest_format: Format of the manifest file (e.g. 'turtle', 'json-ld') :param shex_format: Format of the ShEx files in the manifest. If None, use what the manifest says, otherwise replace '.shex' with shex_format """ self.g = ConjunctiveGraph() self.g.parse(file_loc, format=manifest_format) self.entries: Dict[str, List[ShExManifestEntry]] = {} self.schema_loader = SchemaLoader() self.data_redirector: Optional[URIRedirector] = None self.schema_redirector: Optional[URIRedirector] = None manifest = self.g.value(None, RDF.type, MF.Manifest, any=False) for e in Collection(self.g, self.g.value(manifest, MF.entries, any=False)): entry = ShExManifestEntry(e, self.g, self) self.entries.setdefault(str(entry), []).append(entry) def data_uri(self, uri: URIRef) -> Union[URIRef, str]: return self.data_redirector.uri_for( uri) if self.data_redirector else uri def schema_uri(self, uri: URIRef) -> Union[URIRef, str]: return self.schema_redirector.uri_for( uri) if self.schema_redirector else uri
def test_remove_definition(self): with open(filepath('test-patch-remove-definition.json')) as f: patch1 = f.read() with self.client as client: res = client.patch( '/d/', data=patch1, content_type='application/json', headers={'Authorization': 'Bearer ' + 'NTAwNWViMTgtYmU2Yi00YWMwLWIwODQtMDQ0MzI4OWIzMzc4'}) patch_url = urlparse(res.headers['Location']).path res = client.post( patch_url + 'merge', headers={'Authorization': 'Bearer ' + 'ZjdjNjQ1ODQtMDc1MC00Y2I2LThjODEtMjkzMmY1ZGFhYmI4'}) self.assertEqual(res.status_code, http.client.NO_CONTENT) removed_entities = database.get_removed_entity_keys() self.assertEqual(removed_entities, set(['p0trgkvwbjd'])) res = client.get('/trgkvwbjd', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.GONE) res = client.get('/trgkvwbjd.json', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.GONE) res = client.get('/trgkvwbjd?version=0', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.NOT_FOUND) res = client.get('/trgkvwbjd.json?version=0', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.NOT_FOUND) res = client.get('/trgkvwbjd?version=1', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.OK) res = client.get('/trgkvwbjd.json?version=1', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.OK) res = client.get('/h') g = ConjunctiveGraph() g.parse(format='json-ld', data=res.get_data(as_text=True)) invalidated = g.value(subject=PERIODO['p0h#change-2'], predicate=PROV.invalidated, any=False) self.assertEqual(invalidated, PERIODO['p0trgkvwbjd']) generated = list(g.objects(subject=PERIODO['p0h#change-2'], predicate=PROV.generated)) self.assertEqual(len(generated), 2) self.assertIn(PERIODO['p0d?version=2'], generated) self.assertIn(PERIODO['p0trgkv?version=2'], generated)
class TestLoaders(LoadDirBase): def setup(self): LoadDirBase.setup(self) self.graph = ConjunctiveGraph() def test_load_if_modified(self): fname = "file1.n3" s = "urn:s1" for o in ["urn:T1", "urn:T2"]: self.write_file(fname, "<%s> a <%s> ." % (s, o)) load_if_modified( self.graph, self.fpath(fname), 'n3') time.sleep(1.0) assert_equals(self.graph.value(URIRef(s), RDF.type, any=False), URIRef(o)) def test_load_dir(self): fnames = ["file-%s.n3"%i for i in range(4)] t1 = "urn:T1" t2 = "urn:T2" for fname in fnames: self.write_file(fname, "<urn:%s> a <%s> ." % (fname, t1)) load_dir(self.graph, self.temp_dir) time.sleep(1.0) file0 = fnames[0] self.write_file(file0, "<urn:%s> a <%s> ." % (file0, t2)) load_dir(self.graph, self.temp_dir) assert_equals( self.graph.value(URIRef("urn:%s"%file0), RDF.type, any=False), URIRef(t2)) def test_loader(self): fname = "file1.n3" s = "urn:s1" load = loader(self.graph, self.temp_dir+'/') for o in ["urn:T1", "urn:T2"]: self.write_file(fname, "<%s> a <%s> ." % (s, o)) load(fname) time.sleep(1.0) assert_equals(self.graph.value(URIRef(s), RDF.type, any=False), URIRef(o))
class TestLoaders(LoadDirBase): def setup(self): LoadDirBase.setup(self) self.graph = ConjunctiveGraph() def test_load_if_modified(self): fname = "file1.n3" s = "urn:s1" for o in ["urn:T1", "urn:T2"]: self.write_file(fname, "<%s> a <%s> ." % (s, o)) load_if_modified(self.graph, self.fpath(fname), 'n3') time.sleep(1.0) assert_equals(self.graph.value(URIRef(s), RDF.type, any=False), URIRef(o)) def test_load_dir(self): fnames = ["file-%s.n3" % i for i in range(4)] t1 = "urn:T1" t2 = "urn:T2" for fname in fnames: self.write_file(fname, "<urn:%s> a <%s> ." % (fname, t1)) load_dir(self.graph, self.temp_dir) time.sleep(1.0) file0 = fnames[0] self.write_file(file0, "<urn:%s> a <%s> ." % (file0, t2)) load_dir(self.graph, self.temp_dir) assert_equals( self.graph.value(URIRef("urn:%s" % file0), RDF.type, any=False), URIRef(t2)) def test_loader(self): fname = "file1.n3" s = "urn:s1" load = loader(self.graph, self.temp_dir + '/') for o in ["urn:T1", "urn:T2"]: self.write_file(fname, "<%s> a <%s> ." % (s, o)) load(fname) time.sleep(1.0) assert_equals(self.graph.value(URIRef(s), RDF.type, any=False), URIRef(o))
def blogs(): g = ConjunctiveGraph("Sleepycat") g.open("store") for person, blog in g.subject_objects(predicate=w.Blog): name = g.value(subject=person, predicate=w.Name) for title, feed_url in discover_feeds(blog): if title: title = "%s (%s)" % (name, title) else: title = name logging.info("found %s <%s>" % (title, feed_url)) yield title, feed_url g.close()
def test_jsonld(): # generate shared canvase json-ld tei_file = "../../data/tei/ox/ox-frankenstein_notebook_c1.xml" manifest_uri = 'http://example.com/frankenstein.json' m = Manifest(tei_file, manifest_uri) jsonld = m.jsonld() # parse the json-ld as rdf register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser') g = ConjunctiveGraph() g.parse(data=jsonld, format='json-ld') # sanity check the graph assert g.value(URIRef('http://example.com/frankenstein.json'), RDF.type) == URIRef('http://www.shared-canvas.org/ns/Manifest')
def handle(self, **options): _logger.debug("linking places") for place in models.Place.objects.filter(dbpedia__isnull=True): if not place.city or not place.state: continue # formulate a dbpedia place uri path = urllib2.quote('%s,_%s' % (_clean(place.city), _clean(place.state))) url = URIRef('http://dbpedia.org/resource/%s' % path) # attempt to get a graph from it graph = ConjunctiveGraph() try: _logger.debug("looking up %s" % url) graph.load(url) except urllib2.HTTPError, e: _logger.error(e) # if we've got more than 3 assertions extract some stuff from # the graph and save back some info to the db, would be nice # to have a triple store underneath where we could persist # all the facts eh? if len(graph) >= 3: place.dbpedia = url place.latitude = graph.value(url, geo['lat']) place.longitude = graph.value(url, geo['long']) for object in graph.objects(URIRef(url), owl['sameAs']): if object.startswith('http://sws.geonames.org'): place.geonames = object place.save() _logger.info("found dbpedia resource %s" % url) else: _logger.warn("couldn't find dbpedia resource for %s" % url) reset_queries()
def test_jsonld(): # generate shared canvase json-ld tei_file = "sga/data/tei/ox/ox-frankenstein_notebook_c1.xml" manifest_uri = 'http://example.com/frankenstein.json' m = Manifest(tei_file, manifest_uri) jsonld = m.jsonld() open('test.jsonld', 'w').write(json.dumps(jsonld, indent=2)) # find the manifest manifest = None for r in jsonld['@graph']: if '@type' in r and r['@type'] == 'sc:Manifest': manifest = r assert manifest # check for images assert 'images' in manifest # check for canvases assert 'canvases' in manifest # get the sequence assert 'sequences' in manifest seq = get(jsonld, manifest['sequences'][0]) # first canvas assert 'first' in seq canvas = get(jsonld, seq['first']) assert canvas['label'] == '1r' # check the content annotations assert count_type(jsonld, 'sc:ContentAnnotation') == 90 # css should be there assert count_type(jsonld, 'cnt:ContentAsText') == 61 # parse the json-ld as rdf register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser') g = ConjunctiveGraph() jsonld_str = json.dumps(jsonld) g.parse(data=jsonld_str, format='json-ld') # quick sanity check the graph assert g.value( URIRef('http://example.com/frankenstein.json'), RDF.type) == URIRef('http://www.shared-canvas.org/ns/Manifest') line_anns = list(g.triples((None, RDF.type, SGA.LineAnnotation))) assert len(line_anns) == 638
def test_read(self): self.login_new_user() response = self.post_nanopub(data=self.turtle, content_type="text/turtle", expected_headers=["Location"]) nanopub_id = response.headers['Location'].split('/')[-1] content = self.client.get("/pub/"+nanopub_id, headers={'Accept':'application/json'}, follow_redirects=True) g = ConjunctiveGraph() self.assertEquals(content.mimetype, "application/json") g.parse(data=str(content.data, 'utf8'), format="json-ld") self.assertEquals(len(g), 15) self.assertEquals(g.value(URIRef('http://example.com/janedoe'), RDF.type), URIRef('http://schema.org/Person'))
def test_jsonld(): # generate shared canvase json-ld tei_file = "sga/data/tei/ox/ox-frankenstein_notebook_c1.xml" manifest_uri = 'http://example.com/frankenstein.json' m = Manifest(tei_file, manifest_uri) jsonld = m.jsonld() open('test.jsonld', 'w').write(json.dumps(jsonld, indent=2)) # find the manifest manifest = None for r in jsonld['@graph']: if '@type' in r and r['@type'] == 'sc:Manifest': manifest = r assert manifest # check for images assert 'images' in manifest # check for canvases assert 'canvases' in manifest # get the sequence assert 'sequences' in manifest seq = get(jsonld, manifest['sequences'][0]) # first canvas assert 'first' in seq canvas = get(jsonld, seq['first']) assert canvas['label'] == '1r' # check the content annotations assert count_type(jsonld, 'sc:ContentAnnotation') == 90 # css should be there assert count_type(jsonld, 'cnt:ContentAsText') == 61 # parse the json-ld as rdf register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser') g = ConjunctiveGraph() jsonld_str = json.dumps(jsonld) g.parse(data=jsonld_str, format='json-ld') # quick sanity check the graph assert g.value(URIRef('http://example.com/frankenstein.json'), RDF.type) == URIRef('http://www.shared-canvas.org/ns/Manifest') line_anns = list(g.triples((None, RDF.type, SGA.LineAnnotation))) assert len(line_anns) == 638
def test_read_bnode_graph(self): self.login_new_user() response = self.post_nanopub(data=PERSON_INSTANCE_TRIG, content_type="application/trig", expected_headers=["Location"]) nanopub_id = response.headers['Location'] content = self.client.get("/about?uri="+nanopub_id, headers={'Accept':'application/json'}, follow_redirects=True) g = ConjunctiveGraph() self.assertEquals(content.mimetype, "application/json") g.parse(data=str(content.data, 'utf8'), format="json-ld") self.assertEquals(len(g), 15) self.assertEquals(g.value(URIRef('http://example.com/janedoe'), RDF.type), URIRef('http://schema.org/Person'))
def test_read_custom_graph(self): self.login_new_user() nanopub = Nanopublication(identifier=URIRef("http://example.com/janedoe/info")) nanopub.assertion.parse(data=self.turtle, format="turtle") trig = ConjunctiveGraph(store=nanopub.store).serialize(format='trig') response = self.post_nanopub(data=trig, content_type="application/trig", expected_headers=["Location"]) nanopub_id = response.headers['Location'] self.assertEquals(nanopub_id, "http://example.com/janedoe/info") content = self.client.get("/about?uri="+nanopub_id, headers={'Accept':'application/json'}, follow_redirects=True) g = ConjunctiveGraph() self.assertEquals(content.mimetype, "application/json") g.parse(data=str(content.data, 'utf8'), format="json-ld") self.assertEquals(len(g), 15) self.assertEquals(g.value(URIRef('http://example.com/janedoe'), RDF.type), URIRef('http://schema.org/Person'))
def parse(request, graph): try : cg = ConjunctiveGraph().parse(data=graph, format='n3') except : return not_turtle_response(graph) DRUG = Namespace('http://aers.data2semantics.org/resource/drug/') IND = Namespace('http://aers.data2semantics.org/resource/indication/') PO = Namespace('http://www.data2semantics.org/ontology/patient/') UMLS = Namespace('http://linkedlifedata.com/resource/umls/id/') cg.bind('drug',DRUG) cg.bind('po',PO) cg.bind('umls',UMLS) cg.bind('indication',IND) try : patient = cg.value(predicate=RDF.type, object=PO['Patient'], any=False) except: # More than one patient return multiple_patients_response(cg.serialize(format='turtle')) # If the patient does not have fever, nor neutropenia, return null if not (cg.value(predicate=PO['hasDiagnosis'],object=UMLS['C0027947']) and cg.value(predicate=PO['hasMeasurement'],object=UMLS['C0015967'])) : return not_febrile_neutropenia_response(cg.serialize(format='turtle')) else : # We now know the patient has Febrile Neutropenia cg.add((patient,PO['hasDiagnosis'],UMLS['C0746883'])) # Initialise the score to zero score = 0 trace = "" if cg.value(predicate=PO['burdernOfIllness'],object=PO['MildSymptoms']) or cg.value(predicate=PO['burdernOfIllness'],object=PO['NoSymptoms']) : # Burden of illness: no or mild symptoms trace = trace + "No or mild symptoms\n" score += 5 if not cg.value(predicate=PO['hasDiagnosis'],object=UMLS['C0020649']) : # No hypotension trace = trace + "No hypotension\n" score += 5 if not cg.value(predicate=PO['hasDiagnosis'],object=UMLS['C0024117']) : # No COPD trace = trace + "No COPD\n" score += 4 if cg.value(predicate=PO['hasDiagnosis'],object=UMLS['C0280100']) or not cg.value(predicate=PO['hadPreviousIndication'],object=UMLS['C0026946']) : # Adult: C0280099 # Child: C0279068 # Solid tumor or no previous fungal infection (Mycoses) trace = trace + "Solid tumor or no previous fungal infection\n" score += 4 if not cg.value(predicate=PO['hasDiagnosis'],object=UMLS['C0011175']) : # No dehydration trace = trace + "No dehydration\n" score += 3 if cg.value(predicate=PO['burdernOfIllness'],object=PO['ModerateSymptoms']) : # Burden of illness: no or mild symptoms trace = trace + "Moderate symptoms\n" score += 3 if cg.value(predicate=PO['hasStatus'],object=PO['outpatient']) : # Burden of illness: no or mild symptoms trace = trace + "Outpatient\n" score += 3 patient = cg.value(predicate=RDF.type, object=PO['Patient']) age = cg.value(subject=patient,predicate=PO['hasAge']) if age.toPython < 20 : # Age is under 20 trace = trace + "Age is under 20\n" score += 2 trace = trace + "Age: {} \n".format(age) trace = trace + "Score: {}".format(score) cg.add((patient, RDFS.comment, Literal(trace, datatype=XSD['string']))) if score > 21 : cg.add((patient, PO['complicationRisk'], PO['lowRisk'])) else : cg.add((patient, PO['complicationRisk'], PO['highRisk'])) cg.add((patient, PO['masccIndex'], Literal(score, datatype=XSD['int']))) response = HttpResponse(cg.serialize(format='turtle'), content_type='text/turtle') response['Content-Disposition'] = 'attachment; filename=patient.ttl' return response
def test_get_history(self): with open(filepath('test-patch-adds-items.json')) as f: patch = f.read() with self.client as client: res1 = client.patch( '/d/', data=patch, content_type='application/json', headers={'Authorization': 'Bearer ' + 'NTAwNWViMTgtYmU2Yi00YWMwLWIwODQtMDQ0MzI4OWIzMzc4'}) patch_url = urlparse(res1.headers['Location']).path client.post( patch_url + 'messages', data='{"message": "Here is my patch"}', content_type='application/json', headers={'Authorization': 'Bearer ' + 'NTAwNWViMTgtYmU2Yi00YWMwLWIwODQtMDQ0MzI4OWIzMzc4'}) client.post( patch_url + 'messages', data='{"message": "Looks good to me"}', content_type='application/json', headers={'Authorization': 'Bearer ' + 'ZjdjNjQ1ODQtMDc1MC00Y2I2LThjODEtMjkzMmY1ZGFhYmI4'}) client.post( patch_url + 'merge', buffered=True, headers={'Authorization': 'Bearer ' + 'ZjdjNjQ1ODQtMDc1MC00Y2I2LThjODEtMjkzMmY1ZGFhYmI4'}) res3 = client.get('/h', headers={'Accept': 'application/ld+json'}) self.assertEqual(res3.status_code, http.client.SEE_OTHER) self.assertEqual( urlparse(res3.headers['Location']).path, '/h.jsonld') res4 = client.get('/history.jsonld?inline-context') self.assertEqual(res4.status_code, http.client.OK) self.assertEqual( res4.headers['Content-Type'], 'application/ld+json') jsonld = res4.get_data(as_text=True) g = ConjunctiveGraph() g.parse(format='json-ld', data=jsonld) # Initial data load self.assertIn( # None means any (HOST['h#change-1'], PROV.endedAtTime, None), g) self.assertIn( (HOST['h#change-1'], PROV.used, HOST['d?version=0']), g) self.assertIn( (HOST['d?version=0'], PROV.specializationOf, HOST['d']), g) self.assertIn( (HOST['h#change-1'], RDFS.seeAlso, HOST['h#patch-request-1']), g) self.assertIn( (HOST['h#patch-request-1'], FOAF.page, HOST['patches/1/']), g) self.assertNotIn( (HOST['h#patch-request-1'], AS.replies, HOST['h#patch-request-1-comments']), g) self.assertIn( (HOST['h#change-1'], PROV.used, HOST['h#patch-1']), g) self.assertIn( (HOST['h#patch-1'], FOAF.page, HOST['patches/1/patch.jsonpatch']), g) self.assertIn( (HOST['h#change-1'], PROV.generated, HOST['d?version=1']), g) self.assertIn( (HOST['d?version=1'], PROV.specializationOf, HOST['d']), g) # Change from first submitted patch self.assertIn( # None means any (HOST['h#change-2'], PROV.startedAtTime, None), g) self.assertIn( # None means any (HOST['h#change-2'], PROV.endedAtTime, None), g) start = g.value( subject=HOST['h#change-2'], predicate=PROV.startedAtTime) self.assertEqual(start.datatype, XSD.dateTime) self.assertRegex(start.value.isoformat(), W3CDTF) end = g.value( subject=HOST['h#change-2'], predicate=PROV.endedAtTime) self.assertEqual(end.datatype, XSD.dateTime) self.assertRegex(end.value.isoformat(), W3CDTF) self.assertIn( (HOST['h#change-2'], PROV.wasAssociatedWith, URIRef('https://orcid.org/1234-5678-9101-112X')), g) self.assertIn( (HOST['h#change-2'], PROV.wasAssociatedWith, URIRef('https://orcid.org/1211-1098-7654-321X')), g) for association in g.subjects( predicate=PROV.agent, object=URIRef('https://orcid.org/1234-5678-9101-112X')): role = g.value(subject=association, predicate=PROV.hadRole) self.assertIn(role, (HOST['v#submitted'], HOST['v#updated'])) merger = g.value( predicate=PROV.agent, object=URIRef('https://orcid.org/1211-1098-7654-321X')) self.assertIn( (HOST['h#change-2'], PROV.qualifiedAssociation, merger), g) self.assertIn( (merger, PROV.hadRole, HOST['v#merged']), g) self.assertIn( (HOST['h#change-2'], PROV.used, HOST['d?version=1']), g) self.assertIn( (HOST['d?version=1'], PROV.specializationOf, HOST['d']), g) self.assertIn( (HOST['h#change-2'], RDFS.seeAlso, HOST['h#patch-request-2']), g) self.assertIn( (HOST['h#patch-request-2'], FOAF.page, HOST['patches/2/']), g) self.assertIn( (HOST['h#patch-request-2'], AS.replies, HOST['h#patch-request-2-comments']), g) commentCount = g.value( subject=HOST['h#patch-request-2-comments'], predicate=AS.totalItems) self.assertEqual(commentCount.value, 2) self.assertIn( (HOST['h#patch-request-2-comments'], AS.first, HOST['h#patch-request-2-comment-1']), g) self.assertIn( (HOST['h#patch-request-2-comments'], AS.last, HOST['h#patch-request-2-comment-2']), g) self.assertIn( (HOST['h#patch-request-2-comments'], AS.items, HOST['h#patch-request-2-comment-1']), g) self.assertIn( (HOST['h#patch-request-2-comments'], AS.items, HOST['h#patch-request-2-comment-2']), g) self.assertIn( (HOST['h#patch-request-2-comment-1'], RDF.type, AS.Note), g) self.assertIn( (HOST['h#patch-request-2-comment-1'], AS.attributedTo, URIRef('https://orcid.org/1234-5678-9101-112X')), g) self.assertIn( # None means any (HOST['h#patch-request-2-comment-1'], AS.published, None), g) comment1_media_type = g.value( subject=HOST['h#patch-request-2-comment-1'], predicate=AS.mediaType) self.assertEqual(comment1_media_type.value, 'text/plain') comment1_content = g.value( subject=HOST['h#patch-request-2-comment-1'], predicate=AS.content) self.assertEqual(comment1_content.value, 'Here is my patch') self.assertIn( (HOST['h#patch-request-2-comment-2'], RDF.type, AS.Note), g) self.assertIn( (HOST['h#patch-request-2-comment-2'], AS.attributedTo, URIRef('https://orcid.org/1211-1098-7654-321X')), g) self.assertIn( # None means any (HOST['h#patch-request-2-comment-2'], AS.published, None), g) comment2_media_type = g.value( subject=HOST['h#patch-request-2-comment-2'], predicate=AS.mediaType) self.assertEqual(comment2_media_type.value, 'text/plain') comment2_content = g.value( subject=HOST['h#patch-request-2-comment-2'], predicate=AS.content) self.assertEqual(comment2_content.value, 'Looks good to me') self.assertIn( (HOST['h#change-2'], PROV.used, HOST['h#patch-2']), g) self.assertIn( (HOST['h#patch-2'], FOAF.page, HOST['patches/2/patch.jsonpatch']), g) self.assertIn( (HOST['h#change-2'], PROV.generated, HOST['d?version=2']), g) self.assertIn( (HOST['d?version=2'], PROV.specializationOf, HOST['d']), g)
def test_get_history(self): with open(filepath('test-patch-adds-items.json')) as f: patch = f.read() with self.client as client: res1 = client.patch( '/d/', data=patch, content_type='application/json', headers={'Authorization': 'Bearer ' + 'NTAwNWViMTgtYmU2Yi00YWMwLWIwODQtMDQ0MzI4OWIzMzc4'}) patch_url = urlparse(res1.headers['Location']).path client.post( patch_url + 'merge', headers={'Authorization': 'Bearer ' + 'ZjdjNjQ1ODQtMDc1MC00Y2I2LThjODEtMjkzMmY1ZGFhYmI4'}) res2 = client.get('/h') self.assertEqual(res2.status_code, http.client.OK) self.assertEqual( res2.headers['Content-Type'], 'application/ld+json') jsonld = res2.get_data(as_text=True) g = ConjunctiveGraph() g.parse(format='json-ld', data=jsonld) # Initial data load self.assertIn( # None means any (PERIODO['p0h#change-1'], PROV.endedAtTime, None), g) self.assertIn( (PERIODO['p0h#change-1'], PROV.used, PERIODO['p0d?version=0']), g) self.assertIn( (PERIODO['p0d?version=0'], PROV.specializationOf, PERIODO['p0d']), g) self.assertIn( (PERIODO['p0h#change-1'], PROV.used, PERIODO['p0h#patch-1']), g) self.assertIn( (PERIODO['p0h#patch-1'], FOAF.page, PERIODO['p0patches/1/patch.jsonpatch']), g) self.assertIn( (PERIODO['p0h#change-1'], PROV.generated, PERIODO['p0d?version=1']), g) self.assertIn( (PERIODO['p0d?version=1'], PROV.specializationOf, PERIODO['p0d']), g) self.assertIn( (PERIODO['p0h#change-1'], PROV.generated, PERIODO['p0trgkv?version=1']), g) self.assertIn( (PERIODO['p0trgkv?version=1'], PROV.specializationOf, PERIODO['p0trgkv']), g) self.assertIn( (PERIODO['p0h#change-1'], PROV.generated, PERIODO['p0trgkvwbjd?version=1']), g) self.assertIn( (PERIODO['p0trgkvwbjd?version=1'], PROV.specializationOf, PERIODO['p0trgkvwbjd']), g) # Change from first submitted patch self.assertIn( # None means any (PERIODO['p0h#change-2'], PROV.startedAtTime, None), g) self.assertIn( # None means any (PERIODO['p0h#change-2'], PROV.endedAtTime, None), g) start = g.value( subject=PERIODO['p0h#change-2'], predicate=PROV.startedAtTime) self.assertEqual(start.datatype, XSD.dateTime) self.assertRegex(start.value.isoformat(), W3CDTF) end = g.value( subject=PERIODO['p0h#change-2'], predicate=PROV.endedAtTime) self.assertEqual(end.datatype, XSD.dateTime) self.assertRegex(end.value.isoformat(), W3CDTF) self.assertIn( (PERIODO['p0h#change-2'], PROV.wasAssociatedWith, URIRef('http://orcid.org/1234-5678-9101-112X')), g) self.assertIn( (PERIODO['p0h#change-2'], PROV.wasAssociatedWith, URIRef('http://orcid.org/1211-1098-7654-321X')), g) for association in g.subjects( predicate=PROV.agent, object=URIRef('http://orcid.org/1234-5678-9101-112X')): role = g.value(subject=association, predicate=PROV.hadRole) self.assertIn(role, (PERIODO['p0v#submitted'], PERIODO['p0v#updated'])) merger = g.value( predicate=PROV.agent, object=URIRef('http://orcid.org/1211-1098-7654-321X')) self.assertIn( (PERIODO['p0h#change-2'], PROV.qualifiedAssociation, merger), g) self.assertIn( (merger, PROV.hadRole, PERIODO['p0v#merged']), g) self.assertIn( (PERIODO['p0h#change-2'], PROV.used, PERIODO['p0d?version=1']), g) self.assertIn( (PERIODO['p0d?version=1'], PROV.specializationOf, PERIODO['p0d']), g) self.assertIn( (PERIODO['p0h#change-2'], PROV.used, PERIODO['p0h#patch-2']), g) self.assertIn( (PERIODO['p0h#patch-2'], FOAF.page, PERIODO['p0patches/2/patch.jsonpatch']), g) self.assertIn( (PERIODO['p0h#change-2'], PROV.generated, PERIODO['p0d?version=2']), g) self.assertIn( (PERIODO['p0d?version=2'], PROV.specializationOf, PERIODO['p0d']), g) self.assertIn( (PERIODO['p0h#change-2'], PROV.generated, PERIODO['p0trgkv?version=2']), g) self.assertIn( (PERIODO['p0trgkv?version=2'], PROV.specializationOf, PERIODO['p0trgkv']), g) self.assertIn( (PERIODO['p0trgkv?version=2'], PROV.wasRevisionOf, PERIODO['p0trgkv?version=1']), g) entities = 0 for _, _, version in g.triples( (PERIODO['p0h#change-2'], PROV.generated, None)): entity = g.value(subject=version, predicate=PROV.specializationOf) self.assertEqual(str(entity) + '?version=2', str(version)) entities += 1 self.assertEqual(entities, 5)
class Owler(object): """ Class that includes methods for building an RDF graph from an OWL ontology and retrieving information from it """ def __init__(self, uri, language=""): super(Owler, self).__init__() self.rdfGraph = ConjunctiveGraph() try: self.rdfGraph.parse(uri, format="application/rdf+xml") except Exception: try: self.rdfGraph.parse(uri, format="n3") except Exception: raise exceptions.Error( "Could not parse the file! Is it a valid RDF/OWL ontology?" ) finally: self.baseURI = self.__get_OntologyURI() or uri self.versionIRI = self.__get_versionIRI() self.allclasses = self.__getAllClasses(includeDomainRange=True, includeImplicit=True, removeBlankNodes=False, excludeRDF_OWL=False) def __get_OntologyURI(self, return_as_string=True): test = [ x for x, y, z in self.rdfGraph.triples((None, RDF.type, Ontology)) ] if test: if return_as_string: return str(test[0]) else: return test[0] else: return None def __get_versionIRI(self, return_as_string=True): version = self.rdfGraph.value( self.__get_OntologyURI(return_as_string=False), OWLNS["versionIRI"], default=None) version = str(version) if (return_as_string and version is not None) else version return version def __getAllClasses(self, classPredicate="", includeDomainRange=False, includeImplicit=False, removeBlankNodes=True, addOWLThing=True, excludeRDF_OWL=True): rdfGraph = self.rdfGraph exit = {} def addIfYouCan(x, mydict): if excludeRDF_OWL: if x.startswith('http://www.w3.org/2002/07/owl#') or \ x.startswith("http://www.w3.org/1999/02/22-rdf-syntax-ns#") or \ x.startswith("http://www.w3.org/2000/01/rdf-schema#"): return mydict if x not in mydict: mydict[x] = None return mydict if addOWLThing: exit = addIfYouCan(Thing, exit) if classPredicate == "rdfs" or classPredicate == "": for s in rdfGraph.subjects(RDF.type, RDFS.Class): exit = addIfYouCan(s, exit) if classPredicate == "owl" or classPredicate == "": for s in rdfGraph.subjects(RDF.type, Class): exit = addIfYouCan(s, exit) if includeDomainRange: for o in rdfGraph.objects(None, RDFS.domain): exit = addIfYouCan(o, exit) for o in rdfGraph.objects(None, RDFS.range): exit = addIfYouCan(o, exit) if includeImplicit: for s, v, o in rdfGraph.triples((None, RDFS.subClassOf, None)): exit = addIfYouCan(s, exit) exit = addIfYouCan(o, exit) for o in rdfGraph.objects(None, RDF.type): exit = addIfYouCan(o, exit) # get a list exit = exit.keys() if removeBlankNodes: exit = [x for x in exit if not isBlankNode(x)] return sortUriListByName(exit) # methods for getting ancestors and descendants of classes: by default, we do not include blank nodes def get_classDirectSupers(self, aClass, excludeBnodes=True, sortUriName=False): returnlist = [] for o in self.rdfGraph.objects(aClass, RDFS.subClassOf): if not (o == Thing): if excludeBnodes: if not isBlankNode(o): returnlist.append(o) else: returnlist.append(o) if sortUriName: return sortUriListByName(removeDuplicates(returnlist)) else: return removeDuplicates(returnlist)
def retrieve(request, graph): try : cg = ConjunctiveGraph().parse(data=graph, format='n3') except : return not_turtle_response(graph) DRUG = Namespace('http://aers.data2semantics.org/resource/drug/') PO = Namespace('http://www.data2semantics.org/ontology/patient/') UMLS = Namespace('http://linkedlifedata.com/resource/umls/id/') LS = Namespace('http://linkedlifedata.com/resource/lifeskim/') cg.bind('drug',DRUG) cg.bind('po',PO) cg.bind('umls',UMLS) cg.bind('lifeskim',LS) try : patient = cg.value(predicate=RDF.type, object=PO['Patient'], any=False) except: # More than one patient return multiple_patients_response(cg.serialize(format='turtle')) if (cg.value(predicate=PO['hasIndication'],object=UMLS['C0027947']) and cg.value(predicate=PO['hasMeasurement'],object=UMLS['C0015967'])) : # We now know the patient has Febrile Neutropenia cg.add((patient,PO['hasIndication'],UMLS['C0746883'])) aers_sparql = SPARQLWrapper("http://eculture2.cs.vu.nl:5020/sparql/") aers_sparql.setReturnFormat(JSON) lld_sparql = SPARQLWrapper("http://linkedlifedata.com/sparql") lld_sparql.setReturnFormat(JSON) ranking = Counter() # Chain generators for all values for the attributes of the patient features = itertools.chain(cg.objects(subject=patient, predicate=PO['hasIndication']), \ cg.objects(subject=patient, predicate=PO['hasMeasurement']), \ cg.objects(subject=patient, predicate=PO['usesMedication']), \ cg.objects(subject=patient, predicate=PO['hadPreviousIndication']), \ cg.objects(subject=patient, predicate=PO['hadRecentTreatment'])) exp_features = set() q_part = "" # First get all sameAs uris for the values for f in features : if str(f).startswith('http://linkedlifedata.com'): exp_features.add(str(f)) q_part += "{?altname owl:sameAs <"+f+"> .} UNION { <"+f+"> owl:sameAs ?altname .} UNION \n" q_part = q_part[:-8] q = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT ?altname WHERE { """ + q_part + """ } """ aers_sparql.setQuery(q) results = aers_sparql.query().convert() # Only query LLD for stuff that LLD knows about (saves quite some time) for result in results["results"]["bindings"]: if result["altname"]["value"].startswith('http://linkedlifedata.com') : exp_features.add(result["altname"]["value"]) # Then lookup the publications that mention these, and add them to a tally (Counter) for ef in exp_features : q = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX lifeskim: <http://linkedlifedata.com/resource/lifeskim/> SELECT ?pubmed WHERE { ?pubmed lifeskim:mentions <"""+ef+"""> . } LIMIT 250 """ lld_sparql.setQuery(q) results = lld_sparql.query().convert() for result in results["results"]["bindings"]: ranking[result["pubmed"]["value"]] += 1 # Return only the 20 most frequent publications ranking_json = json.dumps(ranking.most_common(50)) # print ranking_json return HttpResponse(ranking_json, mimetype='application/json')
def handle(self, **options): LOGGER.debug("linking places") for place in models.Place.objects.filter(dbpedia__isnull=True): if not place.city or not place.state: continue # formulate a dbpedia place uri path = urllib2.quote('%s,_%s' % (_clean(place.city), _clean(place.state))) url = URIRef('http://dbpedia.org/resource/%s' % path) # attempt to get a graph from it graph = ConjunctiveGraph() try: LOGGER.debug("looking up %s", url) graph.load(url) except urllib2.HTTPError: LOGGER.exception("Error fetching %s", url) # if we've got more than 3 assertions extract some stuff from # the graph and save back some info to the db, would be nice # to have a triple store underneath where we could persist # all the facts eh? if len(graph) >= 3: place.dbpedia = url place.latitude = graph.value(url, geo['lat']) place.longitude = graph.value(url, geo['long']) for object in graph.objects(URIRef(url), owl['sameAs']): if object.startswith('http://sws.geonames.org'): place.geonames = object place.save() LOGGER.info("found dbpedia resource %s", url) else: LOGGER.warning("couldn't find dbpedia resource for %s", url) reset_queries() LOGGER.info("finished looking up places in dbpedia") LOGGER.info("dumping place_links.json fixture") # so it would be nice to use django.core.serializer here # but it serializes everything about the model, including # titles that are linked to ... and this could theoretically # change over time, so we only preserve the facts that have # been harvested from dbpedia, so they can overlay over # the places that have been extracted during title load json_src = [] places_qs = models.Place.objects.filter(dbpedia__isnull=False) for p in places_qs.order_by('name'): json_src.append( { 'name': p.name, 'dbpedia': p.dbpedia, 'geonames': p.geonames, 'longitude': p.longitude, 'latitude': p.latitude, } ) reset_queries() json.dump(json_src, open('core/fixtures/place_links.json', 'w'), indent=2) LOGGER.info("finished dumping place_links.json fixture")
def handle(self, **options): LOGGER.debug("linking places") for place in models.Place.objects.filter(dbpedia__isnull=True): if not place.city or not place.state: continue # formulate a dbpedia place uri path = urllib2.quote('%s,_%s' % (_clean(place.city), _clean(place.state))) url = URIRef('http://dbpedia.org/resource/%s' % path) # attempt to get a graph from it graph = ConjunctiveGraph() try: LOGGER.debug("looking up %s" % url) graph.load(url) except urllib2.HTTPError as e: LOGGER.error(e) # if we've got more than 3 assertions extract some stuff from # the graph and save back some info to the db, would be nice # to have a triple store underneath where we could persist # all the facts eh? if len(graph) >= 3: place.dbpedia = url place.latitude = graph.value(url, geo['lat']) place.longitude = graph.value(url, geo['long']) for object in graph.objects(URIRef(url), owl['sameAs']): if object.startswith('http://sws.geonames.org'): place.geonames = object place.save() LOGGER.info("found dbpedia resource %s" % url) else: LOGGER.warn("couldn't find dbpedia resource for %s" % url) reset_queries() LOGGER.info("finished looking up places in dbpedia") LOGGER.info("dumping place_links.json fixture") # so it would be nice to use django.core.serializer here # but it serializes everything about the model, including # titles that are linked to ... and this could theoretically # change over time, so we only preserve the facts that have # been harvested from dbpedia, so they can overlay over # the places that have been extracted during title load json_src = [] places_qs = models.Place.objects.filter(dbpedia__isnull=False) for p in places_qs.order_by('name'): json_src.append({ 'name': p.name, 'dbpedia': p.dbpedia, 'geonames': p.geonames, 'longitude': p.longitude, 'latitude': p.latitude }) reset_queries() json.dump(json_src, file('core/fixtures/place_links.json', 'w'), indent=2) LOGGER.info("finished dumping place_links.json fixture")
def test_remove_authority(self): with open(filepath('test-patch-remove-authority.json')) as f: patch1 = f.read() with self.client as client: res = client.patch( '/d/', data=patch1, content_type='application/json', headers={'Authorization': 'Bearer ' + 'NTAwNWViMTgtYmU2Yi00YWMwLWIwODQtMDQ0MzI4OWIzMzc4'}) patch_url = urlparse(res.headers['Location']).path res = client.post( patch_url + 'merge', buffered=True, headers={'Authorization': 'Bearer ' + 'ZjdjNjQ1ODQtMDc1MC00Y2I2LThjODEtMjkzMmY1ZGFhYmI4'}) self.assertEqual(res.status_code, http.client.NO_CONTENT) removed_entities = database.get_removed_entity_keys() self.assertEqual( removed_entities, set(['p0trgkv', 'p0trgkv4kxb', 'p0trgkvkhrv', 'p0trgkvwbjd'])) res = client.get('/trgkv', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.GONE) res = client.get('/trgkv.json', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.GONE) res = client.get('/trgkv?version=0', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.NOT_FOUND) res = client.get('/trgkv.json?version=0', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.NOT_FOUND) res = client.get('/trgkv?version=1', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.OK) res = client.get('/trgkv.json?version=1', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.OK) res = client.get('/trgkvwbjd', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.GONE) res = client.get('/trgkvwbjd.json', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.GONE) res = client.get('/trgkvwbjd?version=0', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.NOT_FOUND) res = client.get('/trgkvwbjd.json?version=0', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.NOT_FOUND) res = client.get('/trgkvwbjd?version=1', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.OK) res = client.get('/trgkvwbjd.json?version=1', headers={'Accept': 'application/json'}, follow_redirects=True) self.assertEqual(res.status_code, http.client.OK) res = client.get('/h.jsonld?inline-context') self.assertEqual( res.headers['Cache-Control'], 'public, max-age=0') self.assertEqual( res.headers['X-Accel-Expires'], '{}'.format(cache.MEDIUM_TIME)) g = ConjunctiveGraph() g.parse(format='json-ld', data=res.get_data(as_text=True)) generated = g.value(subject=HOST['h#change-2'], predicate=PROV.generated, any=False) self.assertEqual(generated, HOST['d?version=2'])
class KB4ITGraph: """ This class creates a RDF graph based on attributes for each doc. Also it has convenient function to ask the graph """ def __init__(self, path=None): """ If not path is passed it build a graph in memory. Otherwise, it creates a persistent graph in disk. """ if path is not None: # Create persistent Graph in disk self.path = path self.graph = ConjunctiveGraph('Sleepycat', URIRef("kb4it://")) graph_path = path + SEP + 'kb4it.graph' self.graph.store.open(graph_path) else: # Create Graph in Memory self.graph = ConjunctiveGraph('IOMemory') # Assign namespaces to the Namespace Manager of this graph namespace_manager = NamespaceManager(ConjunctiveGraph()) for ns in NSBINDINGS: namespace_manager.bind(ns, NSBINDINGS[ns]) self.graph.namespace_manager = namespace_manager def __uniq_sort(self, result): alist = list(result) aset = set(alist) alist = list(aset) alist.sort() return alist def subjects(self, predicate, object): """ Returns a list of sorted and uniques subjects given a predicate and an object. """ return self.__uniq_sort(self.graph.subjects(predicate, object)) def predicates(self, subject=None, object=None): """ Returns a list of sorted and uniques predicates given a subject and an object. """ return self.__uniq_sort(self.graph.predicates(subject, object)) def objects(self, subject, predicate): """ Returns a list of sorted and uniques objects given a subject and an predicate. """ return self.__uniq_sort(self.graph.objects(subject, predicate)) def value(self, subject=None, predicate=None, object=None, default=None, any=True): """ Returns a value given the subject and the predicate. """ return self.graph.value(subject, predicate, object, default, any) def add_document(self, doc): """ Add a new document to the graph. """ subject = URIRef(doc) predicate = RDF['type'] object = URIRef(KB4IT['Document']) self.graph.add([subject, predicate, object]) def add_document_attribute(self, doc, attribute, value): """ Add a new attribute to a document """ predicate = 'has%s' % attribute subject = URIRef(doc) predicate = KB4IT[predicate] object = Literal(value) self.graph.add([subject, predicate, object]) def get_attributes(self): """ Get all predicates except RFD.type and Title """ blacklist = set() blacklist.add(RDF['type']) blacklist.add(KB4IT['hasTitle']) alist = list(self.graph.predicates(None, None)) aset = set(alist) - blacklist alist = list(aset) alist.sort() return alist def serialize(self): """ Serialize graph to pretty xml format """ return self.graph.serialize(format='pretty-xml') def close(self): """ Close the graph if it is persistent. FIXME: check if it is open """ self.graph.store.close()