def testIter(self): """PR 1382: adds __iter__ to Dataset""" d = Dataset() uri_a = URIRef("https://example.com/a") uri_b = URIRef("https://example.com/b") uri_c = URIRef("https://example.com/c") uri_d = URIRef("https://example.com/d") d.add_graph(URIRef("https://example.com/g1")) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1"))) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1") )) # pointless addition: duplicates above d.add_graph(URIRef("https://example.com/g2")) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2"))) d.add((uri_a, uri_b, uri_d, URIRef("https://example.com/g1"))) # new, uri_d # traditional iterator i_trad = 0 for t in d.quads((None, None, None)): i_trad += 1 # new Dataset.__iter__ iterator i_new = 0 for t in d: i_new += 1 self.assertEqual(i_new, i_trad) # both should be 3
def createNanopubs(g): ds = Dataset() ds.namespace_manager.bind("ddi","http://purl.org/net/nlprepository/spl-ddi-annotation-poc#") ds.namespace_manager.bind("prov","http://www.w3.org/ns/prov#") ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#") bindings = g.query(interactSelect) for b in bindings: npURI = URIRef(b['inter'] + "-nanopub") headURI = URIRef(b['inter'] + "-head") aURI = URIRef(b['inter'] + "-assertion") pubInfoURI = URIRef(b['inter'] + "-pubInfo") provURI = URIRef(b['inter'] + "-provenance") head = ds.add_graph(headURI) head.add((npURI, RDF.type, np['Nanopublication'])) head.add((aURI, RDF.type, np['Assertion'])) head.add((provURI, RDF.type, np['Provenance'])) head.add((pubInfoURI, RDF.type, np['PublicationInfo'])) head.add((npURI, np['hasAssertion'], aURI)) head.add((npURI, np['hasProvenance'], provURI)) head.add((npURI, np['hasPublicationInfo'], pubInfoURI)) #print head.serialize() a = ds.add_graph(aURI) a.add((b['s'], URIRef('http://dbmi-icode-01.dbmi.pitt.edu/dikb/vocab/interactsWith'), b['o'])) a.add((b['s'], RDF.type, sio["SIO_010038"])) a.add((b['o'], RDF.type, sio["SIO_010038"])) prov = ds.add_graph(provURI) prov.add((aURI, w3prov['wasDerivedFrom'], b['inter'])) print ds.serialize(format='trig')
def from_csvw(metadata_filepath): pmd_metadata = Dataset() with open(metadata_filepath) as file: json_string = file.read() g = Graph().parse(data=json_string, format='json-ld') # Get all datacubes. datacubes = g.query(""" PREFIX dcat: <http://www.w3.org/ns/dcat#> SELECT * WHERE { ?dataset a dcat:Dataset . } """) for datacube in datacubes: # Try and find a sensible id for each dcat:Dataset specifed in the # metadata file to derive additional URIs for PMD resources datacube_uri = datacube[0] datacube_id = urlparse(datacube_uri).path.rsplit('/', 1)[-1] # Create sensible URIs for PMD specific resources catalog_uri = "http://gss-data.org.uk/catalog/datasets" graph_uri = f"http://gss-data.org.uk/graph/{datacube_id}" metadata_graph_uri = f"http://gss-data.org.uk/graph/{datacube_id}#metadata" catalog_record_uri = f"http://gss-data.org.uk/catalog/{datacube_id}" dataset_uri = f"http://gss-data.org.uk/data/{datacube_id}" metadata = Graph('IOMemory', URIRef(metadata_graph_uri)) metadata.bind('dcat', DCAT) metadata.bind('dct', DCTERMS) metadata.bind('foaf', FOAF) metadata.bind('qb', QB) metadata.bind('pmdcat', PMDCAT) metadata.bind('rdf', RDF) metadata.bind('rdfs', RDFS) metadata.bind('vcard', VCARD) graph = URIRef(graph_uri) metadata_graph = URIRef(metadata_graph_uri) catalog = URIRef(catalog_uri) catalog_record = URIRef(catalog_record_uri) dataset = URIRef(dataset_uri) datacube = URIRef(datacube_uri) triples = [ # Metadata required by PMD: ------------------------------------ (catalog, RDF.type, DCAT.Catalog), (catalog, DCAT.record, catalog_record), (catalog_record, RDF.type, DCAT.CatalogRecord), (catalog_record, FOAF.primaryTopic, dataset), (catalog_record, PMDCAT.metadataGraph, metadata_graph), (dataset, RDF.type, PMDCAT.Dataset), (dataset, PMDCAT.datasetContents, datacube), (dataset, PMDCAT.graph, graph), (datacube, RDF.type, PMDCAT.DataCube) ] # Get metadata attached to a datacube-like object and assign it # to the dcat:Dataset catalog entry. user_defined_metadata = g.query(""" PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX csvw: <http://www.w3.org/ns/csvw#> PREFIX qb: <http://purl.org/linked-data/cube#> PREFIX vcard: <http://www.w3.org/2006/vcard/ns#> SELECT ?dataset ?p ?o WHERE { { ?datacube ?p ?o . FILTER (?p NOT IN ( rdf:type, qb:structure, csvw:tableSchema, csvw:url )) . } } """, initBindings={ "dataset": dataset, "datacube": datacube }) contact_metadata = g.query(""" PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX vcard: <http://www.w3.org/2006/vcard/ns#> SELECT ?contact ?p ?o WHERE { { ?datacube ?p0 ?contact . ?contact a vcard:Individual . ?contact ?p ?o . } } """, initBindings={"datacube": datacube}) triples.extend(list(user_defined_metadata)) triples.extend(list(contact_metadata)) for triple in triples: if triple[2] is not None: metadata.add(triple) pmd_metadata.add_graph(metadata) pmd_metadata.serialize(metadata_filepath.replace( ".csv-metadata.json", ".trig"), format="trig")
def createNanopubs(g): ds = Dataset() ds.namespace_manager.bind("ddi","http://dbmi-icode-01.dbmi.pitt.edu/mp/") ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#") ds.namespace_manager.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") ds.namespace_manager.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#") ds.namespace_manager.bind("owl", "http://www.w3.org/2002/07/owl#") ds.namespace_manager.bind("obo", "http://purl.obolibrary.org/obo/") ds.namespace_manager.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#") ds.namespace_manager.bind("xsd", "http://www.w3.org/2001/XMLSchema#") ds.namespace_manager.bind("dc", "http://purl.org/dc/elements/1.1/") ds.namespace_manager.bind("mp", "http://purl.org/mp/") assertionCount = 1 enzymeCount = 1 pddiD = dict([line.split(',',1) for line in open('../../data/np-graphs/processed-dikb-ddis-for-nanopub.csv')]) cL = dict([line.split('\t') for line in open('../../data/chebi_mapping.txt')]) pL = dict([line.split('\t') for line in open('../../data/pro_mapping.txt')]) substrateD = {} inhibitorD = {} bindings = g.query(interactSelect) for b in bindings: if( pddiD.has_key(str(b['c'].decode('utf-8'))) ): tempClaim = pddiD[ str(b['c'].decode('utf-8')) ] claimInfo = tempClaim.split(',') claimSub = claimInfo[1] claimObj = claimInfo[2] predicateType = claimInfo[0].strip('\n') if(predicateType == "increases_auc"): aURI = URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount assertionCount += 1 bn1 = BNode('1') bn2 = BNode('2') bn3 = BNode('3') bn4 = BNode('4') bn5 = BNode('5') bn6 = BNode('6') bn7 = BNode('7') bn8 = BNode('8') bn9 = BNode('9') bn10 = BNode('10') assertionLabel = cL[claimSub.strip('\n')].strip('\n') + " - " + cL[claimObj.strip('\n')].strip('\n') + " potential drug-drug interaction" a = ds.add_graph((aURI)) a.add(( aURI, RDF.type, np.assertion)) a.add(( aURI, RDF.type, owl.Class)) a.add(( aURI, RDFS.label, (Literal(assertionLabel.lower())))) a.add(( aURI, RDFS.subClassOf, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000000"))) a.add(( bn1, RDF.type, owl.Restriction)) a.add(( bn1, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/IAO_0000136"))) a.add(( bn2, RDF.type, owl.Class)) a.add(( bn3, RDF.first, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000012"))) a.add(( bn5, RDF.first, bn4)) a.add(( bn3, RDF.rest, bn5)) a.add(( bn4, RDF.type, owl.Restriction)) a.add(( bn4, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/BFO_0000052"))) a.add(( bn4, owl.hasValue, URIRef(claimSub.strip('\n')))) a.add(( bn5, RDF.rest, RDF.nil)) a.add(( bn2, owl.intersectionOf, bn3)) a.add(( bn1, owl.someValuesFrom, bn2)) a.add(( aURI, RDFS.subClassOf, bn1)) a.add(( bn6, RDF.type, owl.Restriction)) a.add(( bn6, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/IAO_0000136"))) a.add(( bn7, RDF.type, owl.Class)) a.add(( bn8, RDF.first, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000013"))) a.add(( bn10, RDF.first, bn9)) a.add(( bn8, RDF.rest, bn10)) a.add(( bn9, RDF.type, owl.Restriction)) a.add(( bn9, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/BFO_0000052"))) a.add(( bn9, owl.hasValue, URIRef(claimObj.strip('\n')))) a.add(( bn10, RDF.rest, RDF.nil)) a.add(( bn7, owl.intersectionOf, bn8)) a.add(( bn6, owl.someValuesFrom, bn7)) a.add(( aURI, RDFS.subClassOf, bn6)) ds.add(( aURI, mp.formalizes, b['c'])) ds.add(( b['c'], mp.formalizedAs, aURI)) elif(predicateType == "substrate_of"): aURI = URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount assertionCount += 1 dLabel = cL[claimSub.strip('\n')].strip('\n') eLabel = pL[claimObj.strip('\n')].strip('\n') assertionLabel = dLabel + " substrate of " + eLabel a = ds.add_graph((aURI)) ds.add(( aURI, RDF.type, np.assertion)) ds.add(( aURI, RDFS.label, Literal(assertionLabel.lower()))) ds.add(( aURI, mp.formalizes, b['c'])) ds.add(( b['c'], mp.formalizedAs, aURI)) a.add(( URIRef(claimObj.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/OBI_0000427"))) a.add(( URIRef(claimObj.strip('\n')), RDFS.label, Literal(eLabel.lower()))) a.add(( URIRef(claimObj.strip('\n')), URIRef("http://purl.obolibrary.org/obo/DIDEO_00000096"), URIRef(claimSub.strip('\n')))) a.add(( URIRef(claimSub.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/CHEBI_24431"))) a.add(( URIRef(claimSub.strip('\n')), RDFS.label, Literal(dLabel.lower()))) elif(predicateType == "inhibits"): aURI = URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount assertionCount += 1 dLabel = cL[claimSub.strip('\n')].strip('\n') eLabel = pL[claimObj.strip('\n')].strip('\n') assertionLabel = dLabel + " inhibits " + eLabel a = ds.add_graph((aURI)) ds.add(( aURI, RDF.type, np.assertion)) ds.add(( aURI, RDFS.label, Literal(assertionLabel.lower()))) ds.add(( aURI, mp.formalizes, b['c'])) ds.add(( b['c'], mp.formalizedAs, aURI)) a.add(( URIRef(claimSub.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/CHEBI_24431"))) a.add(( URIRef(claimSub.strip('\n')), RDFS.label, Literal(dLabel.lower()))) a.add(( URIRef(claimSub.strip('\n')), URIRef("http://purl.obolibrary.org/obo/RO_0002449"), URIRef(claimObj.strip('\n')))) print ds.serialize(format='trig')
def createNanopubs(g): ds = Dataset() ds.namespace_manager.bind("ddi","http://dbmi-icode-01.dbmi.pitt.edu/mp/") ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#") ds.namespace_manager.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") ds.namespace_manager.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#") ds.namespace_manager.bind("owl", "http://www.w3.org/2002/07/owl#") ds.namespace_manager.bind("obo", "http://purl.obolibrary.org/obo/") ds.namespace_manager.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#") ds.namespace_manager.bind("xsd", "http://www.w3.org/2001/XMLSchema#") ds.namespace_manager.bind("dc", "http://purl.org/dc/elements/1.1/") ds.namespace_manager.bind("mp", "http://purl.org/mp/") ds.namespace_manager.bind("prov", "http://www.w3.org/ns/prov#") ds.namespace_manager.bind("dikbEvidence", "http://dbmi-icode-01.dbmi.pitt.edu/dikb-evidence/DIKB_evidence_ontology_v1.3.owl#") bindings = g.query(interactSelect) for b in bindings: asIndex = b['a'].decode('utf-8').rfind('-') identifier = b['a'].decode('utf-8')[asIndex:] predicateType = b['t'].decode('utf-8') npURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-nanopub%s') % identifier headURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-head%s') % identifier pubInfoURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-pubInfo%s') % identifier provURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-provenance%s') % identifier aURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion%s') % identifier ds.add(( aURI, RDF.type, np.assertion)) head = ds.add_graph(headURI) head.add((npURI, RDF.type, np['Nanopublication'])) head.add((provURI, RDF.type, np['Provenance'])) head.add((pubInfoURI, RDF.type, np['PublicationInfo'])) head.add((npURI, np['hasAssertion'], aURI)) head.add((npURI, np['hasProvenance'], provURI)) head.add((npURI, np['hasPublicationInfo'], pubInfoURI)) pub = ds.add_graph(pubInfoURI) pub.add((npURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085'))) pub.add((npURI, prov.generatedAtTime, Literal(datetime.now()) )) if(predicateType == "http://purl.obolibrary.org/obo/DIDEO_00000000"): provenance = ds.add_graph(provURI) provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085'))) provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) )) provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria"))) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) elif(predicateType == "http://purl.obolibrary.org/obo/DIDEO_00000096"): provenance = ds.add_graph(provURI) provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085'))) provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) )) provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria"))) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_CT_PK_Genotype )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_CT_PK_Phenotype )) elif(predicateType == "http://purl.obolibrary.org/obo/RO_0002449"): provenance = ds.add_graph(provURI) provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085'))) provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) )) provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria"))) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) print ds.serialize(format='trig')