def dump_as_rdf(g: Dataset, table_name: str) -> bool: """ Dump the contents of Graph g in RDF turtle :param g: Dataset to dump :param table_name: name of the base table :return: success indicator """ # Propagate the mapped concepts up the tree def add_to_ancestors(s: URIRef, vm: URIRef): g.add((s, ISO['enumeratedConceptualDomain.hasMember'], vm)) for parent in g.objects(s, SKOS.broader): add_to_ancestors(parent, vm) if COMPUTE_MEMBERS and EXPLICIT_MEMBERS: for subj, obj in g.subject_objects(SKOS.exactMatch): add_to_ancestors(subj, obj) # TODO: this gives us a list of all concepts in the scheme... useful? for scheme, tc in g.subject_objects(SKOS.hasTopConcept): for member in g.objects( tc, ISO['enumeratedConceptualDomain.hasMember']): g.add((scheme, ISO['enumeratedConceptualDomain.hasMember'], member)) for name, ns in namespaces.items(): g.bind(name.lower(), ns) outfile = os.path.join(DATA_DIR, table_name + '.ttl') print(f"Saving output to {outfile}") g.serialize(outfile, format='turtle') print(f"{len(g)} triples written") return True
def get_fragment(request, subject, predicate, obj, page, graph): fragment = Dataset() tpf_url = urlparse(request.build_absolute_uri()) tpf_url = TPF_URL.format(tpf_url.scheme, tpf_url.netloc, graph) licenses = [] neo_licenses = LicenseModel.nodes.filter(graph__exact=graph) if subject and subject.startswith(LICENSE_SUBJECT_PREFIX): license_id = subject.split('/')[-1] neo_licenses.filter(hashed_sets__exact=license_id) for neo_license in neo_licenses: license_object = ObjectFactory.objectLicense(neo_license) license_object = license_object.to_json() license_object['compatible_licenses'] = [] for compatible_neo_license in neo_license.followings.all(): compatible_license = ObjectFactory.objectLicense( compatible_neo_license) license_object['compatible_licenses'].append( compatible_license.hash()) licenses.append(license_object) rdf_licenses = get_rdf(licenses, graph).triples((subject, predicate, obj)) total_nb_triples = 0 for s, p, o in rdf_licenses: fragment.add((s, p, o)) total_nb_triples += 1 last_result = True nb_triple_per_page = total_nb_triples _frament_fill_meta(subject, predicate, obj, page, graph, fragment, last_result, total_nb_triples, nb_triple_per_page, request, tpf_url) return fragment
def testIter(self): """PR 1382: adds __iter__ to Dataset""" d = Dataset() uri_a = URIRef("https://example.com/a") uri_b = URIRef("https://example.com/b") uri_c = URIRef("https://example.com/c") uri_d = URIRef("https://example.com/d") d.add_graph(URIRef("https://example.com/g1")) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1"))) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1") )) # pointless addition: duplicates above d.add_graph(URIRef("https://example.com/g2")) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2"))) d.add((uri_a, uri_b, uri_d, URIRef("https://example.com/g1"))) # new, uri_d # traditional iterator i_trad = 0 for t in d.quads((None, None, None)): i_trad += 1 # new Dataset.__iter__ iterator i_new = 0 for t in d: i_new += 1 self.assertEqual(i_new, i_trad) # both should be 3
class Fragment(object): HYDRA = Namespace("http://www.w3.org/ns/hydra/core#") VOID = Namespace("http://rdfs.org/ns/void#") FOAF = Namespace("http://xmlns.com/foaf/0.1/") DCTERMS = Namespace("http://purl.org/dc/terms/") def __init__(self): self.rdf_graph = Dataset() def add_data_triple(self, subject, predicate, obj): self.rdf_graph.add((subject, predicate, obj)) def add_graph(self, identifier): self.rdf_graph.graph(identifier) def add_meta_quad(self, graph, subject, predicate, obj): self.rdf_graph.add((graph, subject, predicate, obj)) def add_prefix(self, prefix, uri): self.rdf_graph.bind(prefix, uri) def serialize(self): return self.rdf_graph.serialize(format="trig", encoding="utf-8")
def proc_table_access_row(queries: QueryTexts, ta: TableAccess, g: Dataset) -> int: """ Process a table_access entry :param queries: query cache and tables access :param ta: table_access entry :param g: target Graph :return: URI of value set to be represented """ name_parts = ta.c_fullname.split('\\')[ 2:-1] # Skip leading and trailing slashes concept_scheme = ACT['/'.join(name_parts[:2])] concept_scheme_version = ACT['/'.join(name_parts[:3])] g.add((concept_scheme, RDF.type, SKOS.ConceptScheme)) g.add((concept_scheme, RDF.type, OWL.Ontology)) g.add((concept_scheme, OWL.versionIRI, concept_scheme_version)) return proc_ontology_table( queries, ta.c_table_name, concept_scheme, ta.c_fullname, g) if ta.c_table_name == 'concept_dimension' else 0
def evaluate_ontology_entry(queries: QueryTexts, te: OntologyEntry, cid: URIRef, g: Dataset) -> None: """ Execute the OntologyEntry row in te and get the resulting set fact table keys :param queries: QueryTexts instance :param te: OntologyEntry instance to look up :param cid: parent concept identifier :param g: Graph to add entries to :return: """ column_name, codes, exacts = get_te_valueset(queries, te) if codes: if EXPLICIT_MEMBERS: g.add((cid, RDF.type, ISO.EnumeratedConceptualDomain)) if not COMPUTE_MEMBERS and EXPLICIT_MEMBERS: for code in codes: if is_valid_code(code): g.add((cid, ISO['enumeratedConceptualDomain.hasMember'], code_to_uri(code))) for exact in exacts: if is_valid_code(exact): g.add((cid, SKOS.exactMatch, code_to_uri(exact)))
def data_structure_definition(profile, dataset_name, dataset_base_uri, variables, source_path, source_hash): """Converts the dataset + variables to a set of rdflib Graphs (a nanopublication with provenance annotations) that contains the data structure definition (from the DataCube vocabulary) and the mappings to external datasets. Arguments: dataset -- the name of the dataset variables -- the list of dictionaries with the variables and their mappings to URIs profile -- the Google signin profile source_path -- the path to the dataset file that was annotated source_hash -- the Git hash of the dataset file version of the dataset :returns: an RDF graph store containing a nanopublication """ BASE = Namespace('{}/'.format(dataset_base_uri)) dataset_uri = URIRef(dataset_base_uri) # Initialize a conjunctive graph for the whole lot rdf_dataset = Dataset() rdf_dataset.bind('qbrv', QBRV) rdf_dataset.bind('qbr', QBR) rdf_dataset.bind('qb', QB) rdf_dataset.bind('skos', SKOS) rdf_dataset.bind('prov', PROV) rdf_dataset.bind('np', NP) rdf_dataset.bind('foaf', FOAF) # Initialize the graphs needed for the nanopublication timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M") # Shorten the source hash to 8 digits (similar to Github) source_hash = source_hash[:8] hash_part = source_hash + '/' + timestamp # The Nanopublication consists of three graphs assertion_graph_uri = BASE['assertion/' + hash_part] assertion_graph = rdf_dataset.graph(assertion_graph_uri) provenance_graph_uri = BASE['provenance/' + hash_part] provenance_graph = rdf_dataset.graph(provenance_graph_uri) pubinfo_graph_uri = BASE['pubinfo/' + hash_part] pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri) # A URI that represents the author author_uri = QBR['person/' + profile['email']] rdf_dataset.add((author_uri, RDF.type, FOAF['Person'])) rdf_dataset.add((author_uri, FOAF['name'], Literal(profile['name']))) rdf_dataset.add((author_uri, FOAF['email'], Literal(profile['email']))) rdf_dataset.add((author_uri, QBRV['googleId'], Literal(profile['id']))) try: rdf_dataset.add( (author_uri, FOAF['depiction'], URIRef(profile['image']))) except KeyError: pass # A URI that represents the version of the dataset source file dataset_version_uri = BASE[source_hash] # Some information about the source file used rdf_dataset.add((dataset_version_uri, QBRV['path'], Literal(source_path, datatype=XSD.string))) rdf_dataset.add((dataset_version_uri, QBRV['sha1_hash'], Literal(source_hash, datatype=XSD.string))) # ---- # The nanopublication itself # ---- nanopublication_uri = BASE['nanopublication/' + hash_part] rdf_dataset.add((nanopublication_uri, RDF.type, NP['Nanopublication'])) rdf_dataset.add( (nanopublication_uri, NP['hasAssertion'], assertion_graph_uri)) rdf_dataset.add((assertion_graph_uri, RDF.type, NP['Assertion'])) rdf_dataset.add( (nanopublication_uri, NP['hasProvenance'], provenance_graph_uri)) rdf_dataset.add((provenance_graph_uri, RDF.type, NP['Provenance'])) rdf_dataset.add( (nanopublication_uri, NP['hasPublicationInfo'], pubinfo_graph_uri)) rdf_dataset.add((pubinfo_graph_uri, RDF.type, NP['PublicationInfo'])) # ---- # The provenance graph # ---- # Provenance information for the assertion graph (the data structure definition itself) provenance_graph.add( (assertion_graph_uri, PROV['wasDerivedFrom'], dataset_version_uri)) provenance_graph.add( (dataset_uri, PROV['wasDerivedFrom'], dataset_version_uri)) provenance_graph.add((assertion_graph_uri, PROV['generatedAtTime'], Literal(timestamp, datatype=XSD.datetime))) provenance_graph.add( (assertion_graph_uri, PROV['wasAttributedTo'], author_uri)) # ---- # The publication info graph # ---- # The URI of the latest version of QBer # TODO: should point to the actual latest commit of this QBer source file. # TODO: consider linking to this as the plan of some activity, rather than an activity itself. qber_uri = URIRef('https://github.com/CLARIAH/qber.git') pubinfo_graph.add((nanopublication_uri, PROV['wasGeneratedBy'], qber_uri)) pubinfo_graph.add((nanopublication_uri, PROV['generatedAtTime'], Literal(timestamp, datatype=XSD.datetime))) pubinfo_graph.add( (nanopublication_uri, PROV['wasAttributedTo'], author_uri)) # ---- # The assertion graph # ---- structure_uri = BASE['structure'] assertion_graph.add((dataset_uri, RDF.type, QB['DataSet'])) assertion_graph.add((dataset_uri, RDFS.label, Literal(dataset_name))) assertion_graph.add( (structure_uri, RDF.type, QB['DataStructureDefinition'])) assertion_graph.add((dataset_uri, QB['structure'], structure_uri)) for variable_id, variable in variables.items(): variable_uri = URIRef(variable['original']['uri']) variable_label = Literal(variable['original']['label']) variable_type = URIRef(variable['type']) codelist_uri = URIRef(variable['codelist']['original']['uri']) codelist_label = Literal(variable['codelist']['original']['label']) # The variable as component of the definition component_uri = safe_url(BASE, 'component/' + variable['original']['label']) # Add link between the definition and the component assertion_graph.add((structure_uri, QB['component'], component_uri)) # Add label to variable # TODO: We may need to do something with a changed label for the variable assertion_graph.add((variable_uri, RDFS.label, variable_label)) if 'description' in variable and variable['description'] != "": assertion_graph.add( (variable_uri, RDFS.comment, Literal(variable['description']))) # If the variable URI is not the same as the original, # it is a specialization of a prior variable property. if variable['uri'] != str(variable_uri): assertion_graph.add( (variable_uri, RDFS['subPropertyOf'], URIRef(variable['uri']))) if variable_type == QB['DimensionProperty']: assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB['dimension'], variable_uri)) # Coded variables are also of type coded property (a subproperty of dimension property) if variable['category'] == 'coded': assertion_graph.add( (variable_uri, RDF.type, QB['CodedProperty'])) elif variable_type == QB['MeasureProperty']: # The category 'other' assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB['measure'], variable_uri)) elif variable_type == QB['AttributeProperty']: # Actually never produced by QBer at this stage assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB['attribute'], variable_uri)) # If this variable is of category 'coded', we add codelist and URIs for # each variable (including mappings between value uris and etc....) if variable['category'] == 'coded': assertion_graph.add((codelist_uri, RDF.type, SKOS['Collection'])) assertion_graph.add( (codelist_uri, RDFS.label, Literal(codelist_label))) # The variable should point to the codelist assertion_graph.add((variable_uri, QB['codeList'], codelist_uri)) # The variable is mapped onto an external code list. # If the codelist uri is not the same as the original one, we # have a derived codelist. if variable['codelist']['uri'] != str(codelist_uri): assertion_graph.add((codelist_uri, PROV['wasDerivedFrom'], URIRef(variable['codelist']['uri']))) # Generate a SKOS concept for each of the values and map it to the # assigned codelist for value in variable['values']: value_uri = URIRef(value['original']['uri']) value_label = Literal(value['original']['label']) assertion_graph.add((value_uri, RDF.type, SKOS['Concept'])) assertion_graph.add( (value_uri, SKOS['prefLabel'], Literal(value_label))) assertion_graph.add((codelist_uri, SKOS['member'], value_uri)) # The value has been changed, and therefore there is a mapping if value['original']['uri'] != value['uri']: assertion_graph.add( (value_uri, SKOS['exactMatch'], URIRef(value['uri']))) assertion_graph.add( (value_uri, RDFS.label, Literal(value['label']))) elif variable['category'] == 'identifier': # Generate a SKOS concept for each of the values for value in variable['values']: value_uri = URIRef(value['original']['uri']) value_label = Literal(value['original']['label']) assertion_graph.add((value_uri, RDF.type, SKOS['Concept'])) assertion_graph.add( (value_uri, SKOS['prefLabel'], value_label)) # The value has been changed, and therefore there is a mapping if value['original']['uri'] != value['uri']: assertion_graph.add( (value_uri, SKOS['exactMatch'], URIRef(value['uri']))) assertion_graph.add( (value_uri, RDFS.label, Literal(value['label']))) elif variable['category'] == 'other': # Generate a literal for each of the values when converting the dataset (but not here) pass return rdf_dataset
def proc_ontology_table(queries: QueryTexts, table_name: str, concept_scheme: URIRef, basename: str, g: Dataset) -> int: """ Process the entries in ontology table, table :param queries: QueryText instance :param table_name: table to process :param concept_scheme: Owning concept scheme :param basename: Root name of table -- used to strip the front part of the full name path :param g: Graph to record assertions :return: Number processed """ te: OntologyEntry table = queries.tables[table_name.lower()] nentries = 0 q = queries.ont_session.query( table) if not NUM_CODES else queries.ont_session.query(table).order_by( table.c.c_fullname) for te in q.all(): parent, ccode = proc_fullname(basename, te.c_fullname) if ccode: cid = ACT[ccode] g.add((cid, RDF.type, SKOS.Concept)) g.add((cid, SKOS.inScheme, concept_scheme)) g.add((cid, SKOS.prefLabel, Literal(te.c_name))) if te.c_basecode: g.add((cid, SKOS.editorialNote, Literal(te.c_basecode))) if te.c_tooltip: tip = ', '.join([e for e in te.c_tooltip.split('\\') if e]) g.add((cid, SKOS.scopeNote, Literal(tip))) if parent: g.add((cid, SKOS.broader, ACT[parent])) else: g.add((concept_scheme, SKOS.hasTopConcept, cid)) # Some sort of PyCharm debbugger issue here... va = VisualAttributes(te.c_visualattributes) if va.draggable: evaluate_ontology_entry(queries, te, cid, g) nentries += 1 if NUM_CODES and nentries >= NUM_CODES: break return nentries
def createNanopubs(g): ds = Dataset() ds.namespace_manager.bind("ddi","http://dbmi-icode-01.dbmi.pitt.edu/mp/") ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#") ds.namespace_manager.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") ds.namespace_manager.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#") ds.namespace_manager.bind("owl", "http://www.w3.org/2002/07/owl#") ds.namespace_manager.bind("obo", "http://purl.obolibrary.org/obo/") ds.namespace_manager.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#") ds.namespace_manager.bind("xsd", "http://www.w3.org/2001/XMLSchema#") ds.namespace_manager.bind("dc", "http://purl.org/dc/elements/1.1/") ds.namespace_manager.bind("mp", "http://purl.org/mp/") ds.namespace_manager.bind("prov", "http://www.w3.org/ns/prov#") ds.namespace_manager.bind("dikbEvidence", "http://dbmi-icode-01.dbmi.pitt.edu/dikb-evidence/DIKB_evidence_ontology_v1.3.owl#") bindings = g.query(interactSelect) for b in bindings: asIndex = b['a'].decode('utf-8').rfind('-') identifier = b['a'].decode('utf-8')[asIndex:] predicateType = b['t'].decode('utf-8') npURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-nanopub%s') % identifier headURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-head%s') % identifier pubInfoURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-pubInfo%s') % identifier provURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-provenance%s') % identifier aURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion%s') % identifier ds.add(( aURI, RDF.type, np.assertion)) head = ds.add_graph(headURI) head.add((npURI, RDF.type, np['Nanopublication'])) head.add((provURI, RDF.type, np['Provenance'])) head.add((pubInfoURI, RDF.type, np['PublicationInfo'])) head.add((npURI, np['hasAssertion'], aURI)) head.add((npURI, np['hasProvenance'], provURI)) head.add((npURI, np['hasPublicationInfo'], pubInfoURI)) pub = ds.add_graph(pubInfoURI) pub.add((npURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085'))) pub.add((npURI, prov.generatedAtTime, Literal(datetime.now()) )) if(predicateType == "http://purl.obolibrary.org/obo/DIDEO_00000000"): provenance = ds.add_graph(provURI) provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085'))) provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) )) provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria"))) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) elif(predicateType == "http://purl.obolibrary.org/obo/DIDEO_00000096"): provenance = ds.add_graph(provURI) provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085'))) provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) )) provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria"))) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_CT_PK_Genotype )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_CT_PK_Phenotype )) elif(predicateType == "http://purl.obolibrary.org/obo/RO_0002449"): provenance = ds.add_graph(provURI) provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085'))) provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) )) provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria"))) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) print ds.serialize(format='trig')
r1.title = "Foo" i1_1 = Ingredient() i1_1.name = "Foo_1" t1 = Tag("Baz") r1.ingredients.append(i1_1) r1.tags.append(t1) r1.add_prov("wasDerivedFrom", URIRef("http://recipes.com/r/Foo")) r1.add_pub_info("wasAttributedTo", Literal("Jeff the Data Guy")) summed = Dataset() for quad in r1.__publish__(): summed.add(quad) summed.namespace_manager.bind("np", data.NP, True) summed.namespace_manager.bind("recipe-kb", data.BASE, True) summed.namespace_manager.bind("prov", data.PROV, True) print(summed.serialize(format="trig").decode("utf-8")) u1 = data.USDAEntry(12345, "CHEESE,SERIOUSLY SPICY", []) l1 = data.Linkage(data.IngredientName(i1_1.name), u1) summed = Dataset() for quad in l1.__publish__(): summed.add(quad)
class DatasetTestCase(unittest.TestCase): store = 'default' slow = True tmppath = None def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest( "Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp( prefix='test', dir='/tmp', suffix='.sqlite') elif self.store == "SPARQLUpdateStore": root = HOST + DB self.graph.open((root + "sparql", root + "update")) else: self.tmppath = mkdtemp() if self.store != "SPARQLUpdateStore": self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'urn:michel') self.tarek = URIRef(u'urn:tarek') self.bob = URIRef(u'urn:bob') self.likes = URIRef(u'urn:likes') self.hates = URIRef(u'urn:hates') self.pizza = URIRef(u'urn:pizza') self.cheese = URIRef(u'urn:cheese') # Use regular URIs because SPARQL endpoints like Fuseki alter short names self.c1 = URIRef(u'urn:context-1') self.c2 = URIRef(u'urn:context-2') # delete the graph for each test! self.graph.remove((None, None, None)) for c in self.graph.contexts(): c.remove((None, None, None)) assert len(c) == 0 self.graph.remove_graph(c) def tearDown(self): self.graph.close() if self.store == "SPARQLUpdateStore": pass else: if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def testGraphAware(self): if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # added graph exists self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph is empty self.assertEqual(len(g1), 0) g1.add((self.tarek, self.likes, self.pizza)) # added graph still exists self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph contains one triple self.assertEqual(len(g1), 1) g1.remove((self.tarek, self.likes, self.pizza)) # added graph is empty self.assertEqual(len(g1), 0) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # graph still exists, although empty self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) g.remove_graph(self.c1) # graph is gone self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testDefaultGraph(self): # Something the default graph is read-only (e.g. TDB in union mode) if self.store == "SPARQLUpdateStore": print("Please make sure updating the default graph " "is supported by your SPARQL endpoint") self.graph.add((self.tarek, self.likes, self.pizza)) self.assertEqual(len(self.graph), 1) # only default exists self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) # removing default graph removes triples but not actual graph self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) self.assertEqual(len(self.graph), 0) # default still exists self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testNotUnion(self): # Union depends on the SPARQL endpoint configuration if self.store == "SPARQLUpdateStore": print("Please make sure your SPARQL endpoint has not configured " "its default graph as the union of the named graphs") g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) self.assertEqual(list(self.graph.objects(self.tarek, None)), []) self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
class DatasetTestCase(unittest.TestCase): store = 'default' slow = True tmppath = None def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest( "Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp( prefix='test', dir='/tmp', suffix='.sqlite') else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') self.c1 = URIRef(u'context-1') self.c2 = URIRef(u'context-2') # delete the graph for each test! self.graph.remove((None, None, None)) def tearDown(self): self.graph.close() if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def testGraphAware(self): if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) # added graph exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph is empty self.assertEquals(len(g1), 0) g1.add( (self.tarek, self.likes, self.pizza) ) # added graph still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph contains one triple self.assertEquals(len(g1), 1) g1.remove( (self.tarek, self.likes, self.pizza) ) # added graph is empty self.assertEquals(len(g1), 0) # graph still exists, although empty self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) g.remove_graph(self.c1) # graph is gone self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testDefaultGraph(self): self.graph.add(( self.tarek, self.likes, self.pizza)) self.assertEquals(len(self.graph), 1) # only default exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) # removing default graph removes triples but not actual graph self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) self.assertEquals(len(self.graph), 0) # default still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testNotUnion(self): g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) self.assertEqual(list(self.graph.objects(self.tarek, None)), []) self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
#prev_label = self.get_label() #print "hash value: %08x" % hash #subprocess.Popen(cmd_string, shell=True, stdin=None, stdout=None, stderr=None, close_fds=True) #time.sleep(1) #container_id = self.get_containerID(image_name) #print 'CID:' + container_id #print 'commit' # Generate URI's for docker instance dockerEntityuuid = str(uuid.uuid4()) dockerActivityuuid = str(uuid.uuid4()) ### WARNING WARNING this needs to be generated and stored in the config. dockerUseruuid = str(uuid.uuid4()) # Add entity triples first ds.add((UUIDNS[dockerEntityuuid], RDF.type, PROV.Entity)) ds.add((UUIDNS[dockerEntityuuid], RDF.type, DOCKER.Entity)) ds.add((UUIDNS[dockerEntityuuid], PROV.wasGeneratedBy, UUIDNS[dockerActivityuuid])) ds.add((UUIDNS[dockerEntityuuid], DOCKER.hasImageID, Literal("ImageIDString"))) # Add activity triples next including computational activity. ds.add((UUIDNS[dockerActivityuuid], RDF.type, PROV.Activity)) ds.add((UUIDNS[dockerActivityuuid], RDF.type, CA.compuatationalActivity)) # Need sublcass of docker related activities ds.add((UUIDNS[dockerActivityuuid], RDF.type, DOCKER.commitActivity)) ds.add((UUIDNS[dockerActivityuuid], DOCKER.hasCommand, DOCKER.commitOperation)) ds.add((UUIDNS[dockerActivityuuid], DOCKER.hasContainerID, Literal("blahID"))) ds.add((UUIDNS[dockerActivityuuid], DOCKER.hasContainerTag, Literal("GreatContainer"))) ds.add((UUIDNS[dockerActivityuuid], PROV.startedAtTime,
def createNanopubs(g): ds = Dataset() ds.namespace_manager.bind("ddi","http://dbmi-icode-01.dbmi.pitt.edu/mp/") ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#") ds.namespace_manager.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") ds.namespace_manager.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#") ds.namespace_manager.bind("owl", "http://www.w3.org/2002/07/owl#") ds.namespace_manager.bind("obo", "http://purl.obolibrary.org/obo/") ds.namespace_manager.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#") ds.namespace_manager.bind("xsd", "http://www.w3.org/2001/XMLSchema#") ds.namespace_manager.bind("dc", "http://purl.org/dc/elements/1.1/") ds.namespace_manager.bind("mp", "http://purl.org/mp/") assertionCount = 1 enzymeCount = 1 pddiD = dict([line.split(',',1) for line in open('../../data/np-graphs/processed-dikb-ddis-for-nanopub.csv')]) cL = dict([line.split('\t') for line in open('../../data/chebi_mapping.txt')]) pL = dict([line.split('\t') for line in open('../../data/pro_mapping.txt')]) substrateD = {} inhibitorD = {} bindings = g.query(interactSelect) for b in bindings: if( pddiD.has_key(str(b['c'].decode('utf-8'))) ): tempClaim = pddiD[ str(b['c'].decode('utf-8')) ] claimInfo = tempClaim.split(',') claimSub = claimInfo[1] claimObj = claimInfo[2] predicateType = claimInfo[0].strip('\n') if(predicateType == "increases_auc"): aURI = URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount assertionCount += 1 bn1 = BNode('1') bn2 = BNode('2') bn3 = BNode('3') bn4 = BNode('4') bn5 = BNode('5') bn6 = BNode('6') bn7 = BNode('7') bn8 = BNode('8') bn9 = BNode('9') bn10 = BNode('10') assertionLabel = cL[claimSub.strip('\n')].strip('\n') + " - " + cL[claimObj.strip('\n')].strip('\n') + " potential drug-drug interaction" a = ds.add_graph((aURI)) a.add(( aURI, RDF.type, np.assertion)) a.add(( aURI, RDF.type, owl.Class)) a.add(( aURI, RDFS.label, (Literal(assertionLabel.lower())))) a.add(( aURI, RDFS.subClassOf, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000000"))) a.add(( bn1, RDF.type, owl.Restriction)) a.add(( bn1, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/IAO_0000136"))) a.add(( bn2, RDF.type, owl.Class)) a.add(( bn3, RDF.first, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000012"))) a.add(( bn5, RDF.first, bn4)) a.add(( bn3, RDF.rest, bn5)) a.add(( bn4, RDF.type, owl.Restriction)) a.add(( bn4, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/BFO_0000052"))) a.add(( bn4, owl.hasValue, URIRef(claimSub.strip('\n')))) a.add(( bn5, RDF.rest, RDF.nil)) a.add(( bn2, owl.intersectionOf, bn3)) a.add(( bn1, owl.someValuesFrom, bn2)) a.add(( aURI, RDFS.subClassOf, bn1)) a.add(( bn6, RDF.type, owl.Restriction)) a.add(( bn6, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/IAO_0000136"))) a.add(( bn7, RDF.type, owl.Class)) a.add(( bn8, RDF.first, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000013"))) a.add(( bn10, RDF.first, bn9)) a.add(( bn8, RDF.rest, bn10)) a.add(( bn9, RDF.type, owl.Restriction)) a.add(( bn9, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/BFO_0000052"))) a.add(( bn9, owl.hasValue, URIRef(claimObj.strip('\n')))) a.add(( bn10, RDF.rest, RDF.nil)) a.add(( bn7, owl.intersectionOf, bn8)) a.add(( bn6, owl.someValuesFrom, bn7)) a.add(( aURI, RDFS.subClassOf, bn6)) ds.add(( aURI, mp.formalizes, b['c'])) ds.add(( b['c'], mp.formalizedAs, aURI)) elif(predicateType == "substrate_of"): aURI = URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount assertionCount += 1 dLabel = cL[claimSub.strip('\n')].strip('\n') eLabel = pL[claimObj.strip('\n')].strip('\n') assertionLabel = dLabel + " substrate of " + eLabel a = ds.add_graph((aURI)) ds.add(( aURI, RDF.type, np.assertion)) ds.add(( aURI, RDFS.label, Literal(assertionLabel.lower()))) ds.add(( aURI, mp.formalizes, b['c'])) ds.add(( b['c'], mp.formalizedAs, aURI)) a.add(( URIRef(claimObj.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/OBI_0000427"))) a.add(( URIRef(claimObj.strip('\n')), RDFS.label, Literal(eLabel.lower()))) a.add(( URIRef(claimObj.strip('\n')), URIRef("http://purl.obolibrary.org/obo/DIDEO_00000096"), URIRef(claimSub.strip('\n')))) a.add(( URIRef(claimSub.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/CHEBI_24431"))) a.add(( URIRef(claimSub.strip('\n')), RDFS.label, Literal(dLabel.lower()))) elif(predicateType == "inhibits"): aURI = URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount assertionCount += 1 dLabel = cL[claimSub.strip('\n')].strip('\n') eLabel = pL[claimObj.strip('\n')].strip('\n') assertionLabel = dLabel + " inhibits " + eLabel a = ds.add_graph((aURI)) ds.add(( aURI, RDF.type, np.assertion)) ds.add(( aURI, RDFS.label, Literal(assertionLabel.lower()))) ds.add(( aURI, mp.formalizes, b['c'])) ds.add(( b['c'], mp.formalizedAs, aURI)) a.add(( URIRef(claimSub.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/CHEBI_24431"))) a.add(( URIRef(claimSub.strip('\n')), RDFS.label, Literal(dLabel.lower()))) a.add(( URIRef(claimSub.strip('\n')), URIRef("http://purl.obolibrary.org/obo/RO_0002449"), URIRef(claimObj.strip('\n')))) print ds.serialize(format='trig')
class DatasetTestCase(unittest.TestCase): store = "default" slow = True tmppath = None def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest("Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp(prefix="test", dir="/tmp", suffix=".sqlite") elif self.store == "SPARQLUpdateStore": root = HOST + DB self.graph.open((root + "sparql", root + "update")) else: self.tmppath = mkdtemp() if self.store != "SPARQLUpdateStore": self.graph.open(self.tmppath, create=True) self.michel = URIRef("urn:michel") self.tarek = URIRef("urn:tarek") self.bob = URIRef("urn:bob") self.likes = URIRef("urn:likes") self.hates = URIRef("urn:hates") self.pizza = URIRef("urn:pizza") self.cheese = URIRef("urn:cheese") # Use regular URIs because SPARQL endpoints like Fuseki alter short names self.c1 = URIRef("urn:context-1") self.c2 = URIRef("urn:context-2") # delete the graph for each test! self.graph.remove((None, None, None)) for c in self.graph.contexts(): c.remove((None, None, None)) assert len(c) == 0 self.graph.remove_graph(c) def tearDown(self): self.graph.close() if self.store == "SPARQLUpdateStore": pass else: if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def testGraphAware(self): if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # added graph exists self.assertEqual( set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID]), ) # added graph is empty self.assertEqual(len(g1), 0) g1.add((self.tarek, self.likes, self.pizza)) # added graph still exists self.assertEqual( set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID]), ) # added graph contains one triple self.assertEqual(len(g1), 1) g1.remove((self.tarek, self.likes, self.pizza)) # added graph is empty self.assertEqual(len(g1), 0) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # graph still exists, although empty self.assertEqual( set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID]), ) g.remove_graph(self.c1) # graph is gone self.assertEqual( set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID]), ) def testDefaultGraph(self): # Something the default graph is read-only (e.g. TDB in union mode) if self.store == "SPARQLUpdateStore": print("Please make sure updating the default graph " "is supported by your SPARQL endpoint") self.graph.add((self.tarek, self.likes, self.pizza)) self.assertEqual(len(self.graph), 1) # only default exists self.assertEqual( set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID]), ) # removing default graph removes triples but not actual graph self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) self.assertEqual(len(self.graph), 0) # default still exists self.assertEqual( set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID]), ) def testNotUnion(self): # Union depends on the SPARQL endpoint configuration if self.store == "SPARQLUpdateStore": print("Please make sure your SPARQL endpoint has not configured " "its default graph as the union of the named graphs") g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) self.assertEqual(list(self.graph.objects(self.tarek, None)), []) self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza]) def testIter(self): """PR 1382: adds __iter__ to Dataset""" d = Dataset() uri_a = URIRef("https://example.com/a") uri_b = URIRef("https://example.com/b") uri_c = URIRef("https://example.com/c") uri_d = URIRef("https://example.com/d") d.add_graph(URIRef("https://example.com/g1")) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1"))) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1") )) # pointless addition: duplicates above d.add_graph(URIRef("https://example.com/g2")) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2"))) d.add((uri_a, uri_b, uri_d, URIRef("https://example.com/g1"))) # new, uri_d # traditional iterator i_trad = 0 for t in d.quads((None, None, None)): i_trad += 1 # new Dataset.__iter__ iterator i_new = 0 for t in d: i_new += 1 self.assertEqual(i_new, i_trad) # both should be 3
'icpc2p': 'http://purl.bioontology.org/ontology/ICPC2P/' } if __name__ == '__main__': df = pd.read_excel('data/neurodkg_triples.xlsx', sheet_name='Clean') dataset = Dataset() for index, row in df.iterrows(): subj = row['Subject'].replace(' ', '') pred = row['Predicate'] obj = row['Object'] obj_id = row['Concept ID'] # if the predicate is targetGroup: remove the characters (\ |\>|\<|\-|\_|\;|\:) from the object names if pred == 'targetGroup': obj = re.sub('\ |\>|\<|\-|\_|\;|\:', '', obj) # create triples containing subject (neurodkg instances), predicate (several are defined above), and object (neurodkg instances) and add them to the dataset dataset.add((NEURO_INST[subj], URIRef(predicate_to_uri[pred]), NEURO_INST[obj])) # object id: differentiating between the cases of having a disease ID or not elif str(obj_id) != 'nan': print(obj_id) curie = obj_id.replace(' ', '').split(':') if len(curie) <= 1: print(obj_id) prefix = curie[0].lower() obj_id = curie[1] print(curie) # if a disease ID was found, then add the ID and ontology as object of the triple #obj_uri = BASE[prefix+':'+obj_id] obj_uri = URIRef(prefix_dict[prefix] + obj_id) dataset.add( (NEURO_INST[subj], URIRef(predicate_to_uri[pred]), obj_uri)) # if there was no disease ID in an ontology: use the disease label as object of the triple
def result_to_dataset(result): ds = Dataset() for q in result.bindings: ds.add((q[Variable('s')], q[Variable('p')], q[Variable('o')], q[Variable('g')])) return ds
def data_structure_definition(profile, dataset_name, dataset_base_uri, variables, source_path, source_hash): """Converts the dataset + variables to a set of rdflib Graphs (a nanopublication with provenance annotations) that contains the data structure definition (from the DataCube vocabulary) and the mappings to external datasets. Arguments: dataset -- the name of the dataset variables -- the list of dictionaries with the variables and their mappings to URIs profile -- the Google signin profile source_path -- the path to the dataset file that was annotated source_hash -- the Git hash of the dataset file version of the dataset :returns: an RDF graph store containing a nanopublication """ BASE = Namespace("{}/".format(dataset_base_uri)) dataset_uri = URIRef(dataset_base_uri) # Initialize a conjunctive graph for the whole lot rdf_dataset = Dataset() rdf_dataset.bind("qbrv", QBRV) rdf_dataset.bind("qbr", QBR) rdf_dataset.bind("qb", QB) rdf_dataset.bind("skos", SKOS) rdf_dataset.bind("prov", PROV) rdf_dataset.bind("np", NP) rdf_dataset.bind("foaf", FOAF) # Initialize the graphs needed for the nanopublication timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M") # Shorten the source hash to 8 digits (similar to Github) source_hash = source_hash[:8] hash_part = source_hash + "/" + timestamp # The Nanopublication consists of three graphs assertion_graph_uri = BASE["assertion/" + hash_part] assertion_graph = rdf_dataset.graph(assertion_graph_uri) provenance_graph_uri = BASE["provenance/" + hash_part] provenance_graph = rdf_dataset.graph(provenance_graph_uri) pubinfo_graph_uri = BASE["pubinfo/" + hash_part] pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri) # A URI that represents the author author_uri = QBR["person/" + profile["email"]] rdf_dataset.add((author_uri, RDF.type, FOAF["Person"])) rdf_dataset.add((author_uri, FOAF["name"], Literal(profile["name"]))) rdf_dataset.add((author_uri, FOAF["email"], Literal(profile["email"]))) rdf_dataset.add((author_uri, QBRV["googleId"], Literal(profile["id"]))) try: rdf_dataset.add((author_uri, FOAF["depiction"], URIRef(profile["image"]))) except KeyError: pass # A URI that represents the version of the dataset source file dataset_version_uri = BASE[source_hash] # Some information about the source file used rdf_dataset.add((dataset_version_uri, QBRV["path"], Literal(source_path, datatype=XSD.string))) rdf_dataset.add((dataset_version_uri, QBRV["sha1_hash"], Literal(source_hash, datatype=XSD.string))) # ---- # The nanopublication itself # ---- nanopublication_uri = BASE["nanopublication/" + hash_part] rdf_dataset.add((nanopublication_uri, RDF.type, NP["Nanopublication"])) rdf_dataset.add((nanopublication_uri, NP["hasAssertion"], assertion_graph_uri)) rdf_dataset.add((assertion_graph_uri, RDF.type, NP["Assertion"])) rdf_dataset.add((nanopublication_uri, NP["hasProvenance"], provenance_graph_uri)) rdf_dataset.add((provenance_graph_uri, RDF.type, NP["Provenance"])) rdf_dataset.add((nanopublication_uri, NP["hasPublicationInfo"], pubinfo_graph_uri)) rdf_dataset.add((pubinfo_graph_uri, RDF.type, NP["PublicationInfo"])) # ---- # The provenance graph # ---- # Provenance information for the assertion graph (the data structure definition itself) provenance_graph.add((assertion_graph_uri, PROV["wasDerivedFrom"], dataset_version_uri)) provenance_graph.add((dataset_uri, PROV["wasDerivedFrom"], dataset_version_uri)) provenance_graph.add((assertion_graph_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime))) provenance_graph.add((assertion_graph_uri, PROV["wasAttributedTo"], author_uri)) # ---- # The publication info graph # ---- # The URI of the latest version of QBer # TODO: should point to the actual latest commit of this QBer source file. # TODO: consider linking to this as the plan of some activity, rather than an activity itself. qber_uri = URIRef("https://github.com/CLARIAH/qber.git") pubinfo_graph.add((nanopublication_uri, PROV["wasGeneratedBy"], qber_uri)) pubinfo_graph.add((nanopublication_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime))) pubinfo_graph.add((nanopublication_uri, PROV["wasAttributedTo"], author_uri)) # ---- # The assertion graph # ---- structure_uri = BASE["structure"] assertion_graph.add((dataset_uri, RDF.type, QB["DataSet"])) assertion_graph.add((dataset_uri, RDFS.label, Literal(dataset_name))) assertion_graph.add((structure_uri, RDF.type, QB["DataStructureDefinition"])) assertion_graph.add((dataset_uri, QB["structure"], structure_uri)) for variable_id, variable in variables.items(): variable_uri = URIRef(variable["original"]["uri"]) variable_label = Literal(variable["original"]["label"]) variable_type = URIRef(variable["type"]) codelist_uri = URIRef(variable["codelist"]["original"]["uri"]) codelist_label = Literal(variable["codelist"]["original"]["label"]) # The variable as component of the definition component_uri = safe_url(BASE, "component/" + variable["original"]["label"]) # Add link between the definition and the component assertion_graph.add((structure_uri, QB["component"], component_uri)) # Add label to variable # TODO: We may need to do something with a changed label for the variable assertion_graph.add((variable_uri, RDFS.label, variable_label)) if "description" in variable and variable["description"] != "": assertion_graph.add((variable_uri, RDFS.comment, Literal(variable["description"]))) # If the variable URI is not the same as the original, # it is a specialization of a prior variable property. if variable["uri"] != str(variable_uri): assertion_graph.add((variable_uri, RDFS["subPropertyOf"], URIRef(variable["uri"]))) if variable_type == QB["DimensionProperty"]: assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB["dimension"], variable_uri)) # Coded variables are also of type coded property (a subproperty of dimension property) if variable["category"] == "coded": assertion_graph.add((variable_uri, RDF.type, QB["CodedProperty"])) elif variable_type == QB["MeasureProperty"]: # The category 'other' assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB["measure"], variable_uri)) elif variable_type == QB["AttributeProperty"]: # Actually never produced by QBer at this stage assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB["attribute"], variable_uri)) # If this variable is of category 'coded', we add codelist and URIs for # each variable (including mappings between value uris and etc....) if variable["category"] == "coded": assertion_graph.add((codelist_uri, RDF.type, SKOS["Collection"])) assertion_graph.add((codelist_uri, RDFS.label, Literal(codelist_label))) # The variable should point to the codelist assertion_graph.add((variable_uri, QB["codeList"], codelist_uri)) # The variable is mapped onto an external code list. # If the codelist uri is not the same as the original one, we # have a derived codelist. if variable["codelist"]["uri"] != str(codelist_uri): assertion_graph.add((codelist_uri, PROV["wasDerivedFrom"], URIRef(variable["codelist"]["uri"]))) # Generate a SKOS concept for each of the values and map it to the # assigned codelist for value in variable["values"]: value_uri = URIRef(value["original"]["uri"]) value_label = Literal(value["original"]["label"]) assertion_graph.add((value_uri, RDF.type, SKOS["Concept"])) assertion_graph.add((value_uri, SKOS["prefLabel"], Literal(value_label))) assertion_graph.add((codelist_uri, SKOS["member"], value_uri)) # The value has been changed, and therefore there is a mapping if value["original"]["uri"] != value["uri"]: assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"]))) assertion_graph.add((value_uri, RDFS.label, Literal(value["label"]))) elif variable["category"] == "identifier": # Generate a SKOS concept for each of the values for value in variable["values"]: value_uri = URIRef(value["original"]["uri"]) value_label = Literal(value["original"]["label"]) assertion_graph.add((value_uri, RDF.type, SKOS["Concept"])) assertion_graph.add((value_uri, SKOS["prefLabel"], value_label)) # The value has been changed, and therefore there is a mapping if value["original"]["uri"] != value["uri"]: assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"]))) assertion_graph.add((value_uri, RDFS.label, Literal(value["label"]))) elif variable["category"] == "other": # Generate a literal for each of the values when converting the dataset (but not here) pass return rdf_dataset
# subprocess.Popen(cmd_string, shell=True, stdin=None, # stdout=None, stderr=None, close_fds=True) # time.sleep(1) # container_id = self.get_containerID(image_name) # print 'CID:' + container_id # print 'commit' # Generate URI's for docker instance dockerEntityuuid = str(uuid.uuid4()) dockerActivityuuid = str(uuid.uuid4()) ### WARNING WARNING this needs to be generated and stored in the config. dockerUseruuid = str(uuid.uuid4()) # Add entity triples first ds.add((UUIDNS[dockerEntityuuid], RDF.type, PROV.Entity)) ds.add((UUIDNS[dockerEntityuuid], RDF.type, DOCKER.Entity)) ds.add((UUIDNS[dockerEntityuuid], PROV.wasGeneratedBy, UUIDNS[dockerActivityuuid])) ds.add((UUIDNS[dockerEntityuuid], DOCKER.hasImageID, Literal("ImageIDString"))) # Add activity triples next including computational activity. ds.add((UUIDNS[dockerActivityuuid], RDF.type, PROV.Activity)) ds.add((UUIDNS[dockerActivityuuid], RDF.type, CA.compuatationalActivity)) # Need sublcass of docker related activities ds.add((UUIDNS[dockerActivityuuid], RDF.type, DOCKER.commitActivity)) ds.add((UUIDNS[dockerActivityuuid], DOCKER.hasCommand, DOCKER.commitOperation)) ds.add((UUIDNS[dockerActivityuuid], DOCKER.hasContainerID, Literal("blahID"))) ds.add((UUIDNS[dockerActivityuuid], DOCKER.hasContainerTag, Literal("GreatContainer"))) ds.add((UUIDNS[dockerActivityuuid],
websiteBool = False else: if 'http' in row['straat']: website = Literal(row['straat'], datatype=XSD['string']) elif 'http' in row['internet']: website = Literal(row['internet'], datatype=XSD['string']) elif 'http' in row['plaats_2']: website = Literal(row['plaats_2'], datatype=XSD['string']) else: websiteBool = False else: website = Literal(row['Website'], datatype=XSD['string']) dataset.add((thing, RDFS['label'], name)) dataset.add((thing, DBP['latitude'], lat)) dataset.add((thing, DBP['longitude'], lng)) dataset.add((thing, WGS['lat'], latw)) dataset.add((thing, WGS['long'], lngw)) dataset.add((thing, GEO['hasGeometry'], thinggeo)) dataset.add((thinggeo, RDF['type'], GEO['Geometry'])) dataset.add((thinggeo, GEO['asWKT'], point)) dataset.add((thing, SCHEMA['postalCode'], pcode)) dataset.add((thing, VCARD['hasPostalCode'], pcode)) dataset.add((thing, DBO['PostalCode'], pcode)) dataset.add((thing, SCHEMA['PostalAddress'], addr)) dataset.add((thing, VCARD['hasAddress'], addr)) dataset.add((thing, DBO['address'], addr)) #eventueel voor BAG if short[i] != "toe":
class DatasetTestCase(unittest.TestCase): store = 'default' slow = True tmppath = None def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest("Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp(prefix='test', dir='/tmp', suffix='.sqlite') elif self.store == "SPARQLUpdateStore": root = "http://localhost:3030/ukpp/" self.graph.open((root + "sparql", root + "update")) else: self.tmppath = mkdtemp() if self.store != "SPARQLUpdateStore": self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'urn:michel') self.tarek = URIRef(u'urn:tarek') self.bob = URIRef(u'urn:bob') self.likes = URIRef(u'urn:likes') self.hates = URIRef(u'urn:hates') self.pizza = URIRef(u'urn:pizza') self.cheese = URIRef(u'urn:cheese') # Use regular URIs because SPARQL endpoints like Fuseki alter short names self.c1 = URIRef(u'urn:context-1') self.c2 = URIRef(u'urn:context-2') # delete the graph for each test! self.graph.remove((None, None, None)) for c in self.graph.contexts(): c.remove((None, None, None)) assert len(c) == 0 self.graph.remove_graph(c) def tearDown(self): self.graph.close() if self.store == "SPARQLUpdateStore": pass else: if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def testGraphAware(self): if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # added graph exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph is empty self.assertEquals(len(g1), 0) g1.add((self.tarek, self.likes, self.pizza)) # added graph still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph contains one triple self.assertEquals(len(g1), 1) g1.remove((self.tarek, self.likes, self.pizza)) # added graph is empty self.assertEquals(len(g1), 0) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # graph still exists, although empty self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) g.remove_graph(self.c1) # graph is gone self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testDefaultGraph(self): # Something the default graph is read-only (e.g. TDB in union mode) if self.store == "SPARQLUpdateStore": print "Please make sure updating the default graph " \ "is supported by your SPARQL endpoint" self.graph.add((self.tarek, self.likes, self.pizza)) self.assertEquals(len(self.graph), 1) # only default exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) # removing default graph removes triples but not actual graph self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) self.assertEquals(len(self.graph), 0) # default still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testNotUnion(self): # Union depends on the SPARQL endpoint configuration if self.store == "SPARQLUpdateStore": print "Please make sure your SPARQL endpoint has not configured " \ "its default graph as the union of the named graphs" g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) self.assertEqual(list(self.graph.objects(self.tarek, None)), []) self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
# Create an empty Dataset d = Dataset() # Add a namespace prefix to it, just like for Graph d.bind("ex", Namespace("http://example.com/")) # Declare a Graph URI to be used to identify a Graph graph_1 = URIRef("http://example.com/graph-1") # Add an empty Graph, identified by graph_1, to the Dataset d.graph(identifier=graph_1) # Add two quads to Graph graph_1 in the Dataset d.add(( URIRef("http://example.com/subject-x"), URIRef("http://example.com/predicate-x"), Literal("Triple X"), graph_1 )) d.add(( URIRef("http://example.com/subject-z"), URIRef("http://example.com/predicate-z"), Literal("Triple Z"), graph_1 )) # Add another quad to the Dataset to a non-existent Graph: # the Graph is created automatically d.add(( URIRef("http://example.com/subject-y"), URIRef("http://example.com/predicate-y"), Literal("Triple Y"),
# The Nanopublication consists of three graphs assertion_graph_uri = BASE['assertion/' + hash_part] assertion_graph = rdf_dataset.graph(assertion_graph_uri) provenance_graph_uri = BASE['provenance/' + hash_part] provenance_graph = rdf_dataset.graph(provenance_graph_uri) pubinfo_graph_uri = BASE['pubinfo/' + hash_part] pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri) # A URI that represents the author author_uri = QBR['person/[email protected]'] rdf_dataset.add((author_uri, RDF.type, FOAF['Person'])) rdf_dataset.add((author_uri, FOAF['name'], Literal('Kathrin Dentler'))) rdf_dataset.add((author_uri, FOAF['email'], Literal('*****@*****.**'))) rdf_dataset.add((author_uri, FOAF['depiction'], URIRef('http://www.dentler.org/kathrin.jpg'))) # rdf_dataset.add((author_uri, QBRV['googleId'], Literal(profile['id']))) # A URI that represents the version of the dataset source file dataset_version_uri = BASE[source_hash] # Some information about the source file used rdf_dataset.add((dataset_version_uri, QBRV['path'], Literal(pathtofile, datatype=XSD.string))) rdf_dataset.add((dataset_version_uri, QBRV['sha1_hash'], Literal(source_hash, datatype=XSD.string))) # ----
class DatasetTestCase(unittest.TestCase): store = 'default' slow = True tmppath = None def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest("Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp(prefix='test', dir='/tmp', suffix='.sqlite') else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') self.c1 = URIRef(u'context-1') self.c2 = URIRef(u'context-2') # delete the graph for each test! self.graph.remove((None, None, None)) def tearDown(self): self.graph.close() if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def testGraphAware(self): if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) # added graph exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph is empty self.assertEquals(len(g1), 0) g1.add((self.tarek, self.likes, self.pizza)) # added graph still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph contains one triple self.assertEquals(len(g1), 1) g1.remove((self.tarek, self.likes, self.pizza)) # added graph is empty self.assertEquals(len(g1), 0) # graph still exists, although empty self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) g.remove_graph(self.c1) # graph is gone self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testDefaultGraph(self): self.graph.add((self.tarek, self.likes, self.pizza)) self.assertEquals(len(self.graph), 1) # only default exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) # removing default graph removes triples but not actual graph self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) self.assertEquals(len(self.graph), 0) # default still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testNotUnion(self): g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) self.assertEqual(list(self.graph.objects(self.tarek, None)), []) self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])