Esempio n. 1
0
    def testIter(self):
        """PR 1382: adds __iter__ to Dataset"""
        d = Dataset()
        uri_a = URIRef("https://example.com/a")
        uri_b = URIRef("https://example.com/b")
        uri_c = URIRef("https://example.com/c")
        uri_d = URIRef("https://example.com/d")

        d.add_graph(URIRef("https://example.com/g1"))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1")))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1")
               ))  # pointless addition: duplicates above

        d.add_graph(URIRef("https://example.com/g2"))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2")))
        d.add((uri_a, uri_b, uri_d,
               URIRef("https://example.com/g1")))  # new, uri_d

        # traditional iterator
        i_trad = 0
        for t in d.quads((None, None, None)):
            i_trad += 1

        # new Dataset.__iter__ iterator
        i_new = 0
        for t in d:
            i_new += 1

        self.assertEqual(i_new, i_trad)  # both should be 3
def createNanopubs(g):
	ds = Dataset()
	ds.namespace_manager.bind("ddi","http://purl.org/net/nlprepository/spl-ddi-annotation-poc#")
	ds.namespace_manager.bind("prov","http://www.w3.org/ns/prov#")
	ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#")
	
	bindings = g.query(interactSelect)
	for b in bindings:
		npURI = URIRef(b['inter'] + "-nanopub")
		headURI = URIRef(b['inter'] + "-head")
		aURI =  URIRef(b['inter'] + "-assertion")
		pubInfoURI = URIRef(b['inter'] + "-pubInfo")
		provURI = URIRef(b['inter'] + "-provenance")
		
		
		head = ds.add_graph(headURI)
		head.add((npURI, RDF.type, np['Nanopublication']))
		head.add((aURI, RDF.type, np['Assertion']))
		head.add((provURI, RDF.type, np['Provenance']))
		head.add((pubInfoURI, RDF.type, np['PublicationInfo']))
		head.add((npURI, np['hasAssertion'], aURI))
		head.add((npURI, np['hasProvenance'], provURI))
		head.add((npURI, np['hasPublicationInfo'], pubInfoURI))

		#print head.serialize()
		
		a = ds.add_graph(aURI)
		a.add((b['s'], URIRef('http://dbmi-icode-01.dbmi.pitt.edu/dikb/vocab/interactsWith'), b['o']))
		a.add((b['s'], RDF.type, sio["SIO_010038"]))
		a.add((b['o'], RDF.type,  sio["SIO_010038"]))
		
		prov = ds.add_graph(provURI)
		prov.add((aURI, w3prov['wasDerivedFrom'], b['inter']))
		
 	print ds.serialize(format='trig')
Esempio n. 3
0
    def from_csvw(metadata_filepath):

        pmd_metadata = Dataset()

        with open(metadata_filepath) as file:
            json_string = file.read()
            g = Graph().parse(data=json_string, format='json-ld')

        # Get all datacubes.
        datacubes = g.query("""
            PREFIX dcat: <http://www.w3.org/ns/dcat#>
            SELECT *
            WHERE {
                ?dataset a dcat:Dataset .
            }
            """)

        for datacube in datacubes:
            # Try and find a sensible id for each dcat:Dataset specifed in the
            # metadata file to derive additional URIs for PMD resources
            datacube_uri = datacube[0]
            datacube_id = urlparse(datacube_uri).path.rsplit('/', 1)[-1]

            # Create sensible URIs for PMD specific resources
            catalog_uri = "http://gss-data.org.uk/catalog/datasets"
            graph_uri = f"http://gss-data.org.uk/graph/{datacube_id}"
            metadata_graph_uri = f"http://gss-data.org.uk/graph/{datacube_id}#metadata"
            catalog_record_uri = f"http://gss-data.org.uk/catalog/{datacube_id}"
            dataset_uri = f"http://gss-data.org.uk/data/{datacube_id}"

            metadata = Graph('IOMemory', URIRef(metadata_graph_uri))
            metadata.bind('dcat', DCAT)
            metadata.bind('dct', DCTERMS)
            metadata.bind('foaf', FOAF)
            metadata.bind('qb', QB)
            metadata.bind('pmdcat', PMDCAT)
            metadata.bind('rdf', RDF)
            metadata.bind('rdfs', RDFS)
            metadata.bind('vcard', VCARD)

            graph = URIRef(graph_uri)
            metadata_graph = URIRef(metadata_graph_uri)
            catalog = URIRef(catalog_uri)
            catalog_record = URIRef(catalog_record_uri)
            dataset = URIRef(dataset_uri)
            datacube = URIRef(datacube_uri)

            triples = [
                # Metadata required by PMD: ------------------------------------
                (catalog, RDF.type, DCAT.Catalog),
                (catalog, DCAT.record, catalog_record),
                (catalog_record, RDF.type, DCAT.CatalogRecord),
                (catalog_record, FOAF.primaryTopic, dataset),
                (catalog_record, PMDCAT.metadataGraph, metadata_graph),
                (dataset, RDF.type, PMDCAT.Dataset),
                (dataset, PMDCAT.datasetContents, datacube),
                (dataset, PMDCAT.graph, graph),
                (datacube, RDF.type, PMDCAT.DataCube)
            ]

            # Get metadata attached to a datacube-like object and assign it
            # to the dcat:Dataset catalog entry.
            user_defined_metadata = g.query("""
                PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                PREFIX csvw: <http://www.w3.org/ns/csvw#>
                PREFIX qb: <http://purl.org/linked-data/cube#>
                PREFIX vcard: <http://www.w3.org/2006/vcard/ns#>
                SELECT ?dataset ?p ?o
                WHERE {
                    {
                        ?datacube ?p ?o .
                        FILTER (?p NOT IN (
                            rdf:type, qb:structure, csvw:tableSchema, csvw:url
                        )) .
                    }
                }
                """,
                                            initBindings={
                                                "dataset": dataset,
                                                "datacube": datacube
                                            })

            contact_metadata = g.query("""
                PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                PREFIX vcard: <http://www.w3.org/2006/vcard/ns#>
                SELECT ?contact ?p ?o
                WHERE {
                    {
                        ?datacube ?p0 ?contact .
                        ?contact a vcard:Individual .
                        ?contact ?p ?o .
                    }
                }
                """,
                                       initBindings={"datacube": datacube})

            triples.extend(list(user_defined_metadata))
            triples.extend(list(contact_metadata))

            for triple in triples:
                if triple[2] is not None:
                    metadata.add(triple)

            pmd_metadata.add_graph(metadata)

        pmd_metadata.serialize(metadata_filepath.replace(
            ".csv-metadata.json", ".trig"),
                               format="trig")
def createNanopubs(g):
		
	ds = Dataset()
	ds.namespace_manager.bind("ddi","http://dbmi-icode-01.dbmi.pitt.edu/mp/")
	ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#")
	ds.namespace_manager.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
	ds.namespace_manager.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
	ds.namespace_manager.bind("owl", "http://www.w3.org/2002/07/owl#")
	ds.namespace_manager.bind("obo", "http://purl.obolibrary.org/obo/")
	ds.namespace_manager.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#")
	ds.namespace_manager.bind("xsd", "http://www.w3.org/2001/XMLSchema#")
	ds.namespace_manager.bind("dc", "http://purl.org/dc/elements/1.1/")
	ds.namespace_manager.bind("mp", "http://purl.org/mp/")

	assertionCount = 1
	enzymeCount = 1

	pddiD = dict([line.split(',',1) for line in open('../../data/np-graphs/processed-dikb-ddis-for-nanopub.csv')])
	cL = dict([line.split('\t') for line in open('../../data/chebi_mapping.txt')])
	pL = dict([line.split('\t') for line in open('../../data/pro_mapping.txt')])
	substrateD = {}
	inhibitorD = {}
			
	bindings = g.query(interactSelect)
	for b in bindings:

		if( pddiD.has_key(str(b['c'].decode('utf-8'))) ):
			tempClaim = pddiD[ str(b['c'].decode('utf-8')) ]
			claimInfo = tempClaim.split(',')
			claimSub = claimInfo[1]
			claimObj = claimInfo[2]
			predicateType = claimInfo[0].strip('\n')
				
			if(predicateType == "increases_auc"):

				aURI =	URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount
				assertionCount += 1
			
				bn1 = BNode('1')
				bn2 = BNode('2')
				bn3 = BNode('3')
				bn4 = BNode('4')
				bn5 = BNode('5')
				bn6 = BNode('6')
				bn7 = BNode('7')
				bn8 = BNode('8')
				bn9 = BNode('9')
				bn10 = BNode('10')

				assertionLabel = cL[claimSub.strip('\n')].strip('\n') + " - " + cL[claimObj.strip('\n')].strip('\n') + " potential drug-drug interaction"

				a = ds.add_graph((aURI))
				a.add(( aURI, RDF.type, np.assertion))
				a.add(( aURI, RDF.type, owl.Class))
				a.add(( aURI, RDFS.label, (Literal(assertionLabel.lower()))))	 
				a.add(( aURI, RDFS.subClassOf, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000000")))
				a.add(( bn1, RDF.type, owl.Restriction))
				a.add(( bn1, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/IAO_0000136")))
				a.add(( bn2, RDF.type, owl.Class))
				a.add(( bn3, RDF.first, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000012")))
				a.add(( bn5, RDF.first, bn4))
				a.add(( bn3, RDF.rest, bn5))
				a.add(( bn4, RDF.type, owl.Restriction))
				a.add(( bn4, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/BFO_0000052")))
				a.add(( bn4, owl.hasValue, URIRef(claimSub.strip('\n'))))
				a.add(( bn5, RDF.rest, RDF.nil))
				a.add(( bn2, owl.intersectionOf, bn3))
				a.add(( bn1, owl.someValuesFrom, bn2))
				a.add(( aURI, RDFS.subClassOf, bn1))
				a.add(( bn6, RDF.type, owl.Restriction))
				a.add(( bn6, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/IAO_0000136")))
				a.add(( bn7, RDF.type, owl.Class))
				a.add(( bn8, RDF.first, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000013")))
				a.add(( bn10, RDF.first, bn9))
				a.add(( bn8, RDF.rest, bn10))
				a.add(( bn9, RDF.type, owl.Restriction))
				a.add(( bn9, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/BFO_0000052")))
				a.add(( bn9, owl.hasValue, URIRef(claimObj.strip('\n'))))
				a.add(( bn10, RDF.rest, RDF.nil))
				a.add(( bn7, owl.intersectionOf, bn8))
				a.add(( bn6, owl.someValuesFrom, bn7))
				a.add(( aURI, RDFS.subClassOf, bn6))

				ds.add(( aURI, mp.formalizes, b['c']))
				ds.add(( b['c'], mp.formalizedAs, aURI))
				
			elif(predicateType == "substrate_of"):
						
				aURI =	URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount
				assertionCount += 1
				
				dLabel = cL[claimSub.strip('\n')].strip('\n')
				eLabel = pL[claimObj.strip('\n')].strip('\n')
				assertionLabel = dLabel + " substrate of " + eLabel

				a = ds.add_graph((aURI))
				ds.add(( aURI, RDF.type, np.assertion))
				ds.add(( aURI, RDFS.label, Literal(assertionLabel.lower())))				   
				ds.add(( aURI, mp.formalizes, b['c']))
				ds.add(( b['c'], mp.formalizedAs, aURI))
				
				a.add(( URIRef(claimObj.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/OBI_0000427")))
				a.add(( URIRef(claimObj.strip('\n')), RDFS.label, Literal(eLabel.lower())))
				a.add(( URIRef(claimObj.strip('\n')), URIRef("http://purl.obolibrary.org/obo/DIDEO_00000096"), URIRef(claimSub.strip('\n'))))

				a.add(( URIRef(claimSub.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/CHEBI_24431")))
				a.add(( URIRef(claimSub.strip('\n')), RDFS.label, Literal(dLabel.lower())))
				
			elif(predicateType == "inhibits"):

				aURI =	URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount
				assertionCount += 1
				
				dLabel = cL[claimSub.strip('\n')].strip('\n')
				eLabel = pL[claimObj.strip('\n')].strip('\n')
				assertionLabel = dLabel + " inhibits " + eLabel
				
				a = ds.add_graph((aURI))
				ds.add(( aURI, RDF.type, np.assertion))
				ds.add(( aURI, RDFS.label, Literal(assertionLabel.lower())))
				ds.add(( aURI, mp.formalizes, b['c']))
				ds.add(( b['c'], mp.formalizedAs, aURI))
				
				a.add(( URIRef(claimSub.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/CHEBI_24431")))
				a.add(( URIRef(claimSub.strip('\n')), RDFS.label, Literal(dLabel.lower())))
				a.add(( URIRef(claimSub.strip('\n')), URIRef("http://purl.obolibrary.org/obo/RO_0002449"), URIRef(claimObj.strip('\n'))))

	print ds.serialize(format='trig')
def createNanopubs(g):
		
	ds = Dataset()
	ds.namespace_manager.bind("ddi","http://dbmi-icode-01.dbmi.pitt.edu/mp/")
	ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#")
	ds.namespace_manager.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
	ds.namespace_manager.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
	ds.namespace_manager.bind("owl", "http://www.w3.org/2002/07/owl#")
	ds.namespace_manager.bind("obo", "http://purl.obolibrary.org/obo/")
	ds.namespace_manager.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#")
	ds.namespace_manager.bind("xsd", "http://www.w3.org/2001/XMLSchema#")
	ds.namespace_manager.bind("dc", "http://purl.org/dc/elements/1.1/")
	ds.namespace_manager.bind("mp", "http://purl.org/mp/")
	ds.namespace_manager.bind("prov", "http://www.w3.org/ns/prov#")
	ds.namespace_manager.bind("dikbEvidence", "http://dbmi-icode-01.dbmi.pitt.edu/dikb-evidence/DIKB_evidence_ontology_v1.3.owl#")
	
	bindings = g.query(interactSelect)
	for b in bindings:
	
		asIndex = b['a'].decode('utf-8').rfind('-')		   
		identifier = b['a'].decode('utf-8')[asIndex:]
		predicateType = b['t'].decode('utf-8')

		npURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-nanopub%s') % identifier
		headURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-head%s') % identifier
		pubInfoURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-pubInfo%s') % identifier
		provURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-provenance%s') % identifier
		aURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion%s') % identifier

		ds.add(( aURI, RDF.type, np.assertion))
		
		head = ds.add_graph(headURI)
		head.add((npURI, RDF.type, np['Nanopublication']))
		head.add((provURI, RDF.type, np['Provenance']))
		head.add((pubInfoURI, RDF.type, np['PublicationInfo']))
		head.add((npURI, np['hasAssertion'], aURI))
		head.add((npURI, np['hasProvenance'], provURI))
		head.add((npURI, np['hasPublicationInfo'], pubInfoURI))

		pub = ds.add_graph(pubInfoURI)
		pub.add((npURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085')))
		pub.add((npURI, prov.generatedAtTime, Literal(datetime.now()) ))
		
		if(predicateType == "http://purl.obolibrary.org/obo/DIDEO_00000000"):

			provenance = ds.add_graph(provURI)
			provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085')))
			provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) ))
			provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria")))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps ))						 
					
		elif(predicateType == "http://purl.obolibrary.org/obo/DIDEO_00000096"):

			provenance = ds.add_graph(provURI)
			provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085')))
			provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) ))
			provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria")))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) 
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_CT_PK_Genotype ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_CT_PK_Phenotype )) 
					
		elif(predicateType == "http://purl.obolibrary.org/obo/RO_0002449"):

			provenance = ds.add_graph(provURI)
			provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085')))
			provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) ))
			provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria")))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) 
						
	print ds.serialize(format='trig')