Esempio n. 1
0
def dump_as_rdf(g: Dataset, table_name: str) -> bool:
    """
    Dump the contents of Graph g in RDF turtle
    :param g: Dataset to dump
    :param table_name: name of the base table
    :return: success indicator
    """

    # Propagate the mapped concepts up the tree
    def add_to_ancestors(s: URIRef, vm: URIRef):
        g.add((s, ISO['enumeratedConceptualDomain.hasMember'], vm))
        for parent in g.objects(s, SKOS.broader):
            add_to_ancestors(parent, vm)

    if COMPUTE_MEMBERS and EXPLICIT_MEMBERS:
        for subj, obj in g.subject_objects(SKOS.exactMatch):
            add_to_ancestors(subj, obj)
        # TODO: this gives us a list of all concepts in the scheme... useful?
        for scheme, tc in g.subject_objects(SKOS.hasTopConcept):
            for member in g.objects(
                    tc, ISO['enumeratedConceptualDomain.hasMember']):
                g.add((scheme, ISO['enumeratedConceptualDomain.hasMember'],
                       member))

    for name, ns in namespaces.items():
        g.bind(name.lower(), ns)
    outfile = os.path.join(DATA_DIR, table_name + '.ttl')
    print(f"Saving output to {outfile}")
    g.serialize(outfile, format='turtle')
    print(f"{len(g)} triples written")
    return True
def get_fragment(request, subject, predicate, obj, page, graph):
    fragment = Dataset()
    tpf_url = urlparse(request.build_absolute_uri())
    tpf_url = TPF_URL.format(tpf_url.scheme, tpf_url.netloc, graph)
    licenses = []
    neo_licenses = LicenseModel.nodes.filter(graph__exact=graph)
    if subject and subject.startswith(LICENSE_SUBJECT_PREFIX):
        license_id = subject.split('/')[-1]
        neo_licenses.filter(hashed_sets__exact=license_id)
    for neo_license in neo_licenses:
        license_object = ObjectFactory.objectLicense(neo_license)
        license_object = license_object.to_json()
        license_object['compatible_licenses'] = []
        for compatible_neo_license in neo_license.followings.all():
            compatible_license = ObjectFactory.objectLicense(
                compatible_neo_license)
            license_object['compatible_licenses'].append(
                compatible_license.hash())
        licenses.append(license_object)
    rdf_licenses = get_rdf(licenses, graph).triples((subject, predicate, obj))
    total_nb_triples = 0
    for s, p, o in rdf_licenses:
        fragment.add((s, p, o))
        total_nb_triples += 1
    last_result = True
    nb_triple_per_page = total_nb_triples
    _frament_fill_meta(subject, predicate, obj, page, graph, fragment,
                       last_result, total_nb_triples, nb_triple_per_page,
                       request, tpf_url)
    return fragment
Esempio n. 3
0
    def testIter(self):
        """PR 1382: adds __iter__ to Dataset"""
        d = Dataset()
        uri_a = URIRef("https://example.com/a")
        uri_b = URIRef("https://example.com/b")
        uri_c = URIRef("https://example.com/c")
        uri_d = URIRef("https://example.com/d")

        d.add_graph(URIRef("https://example.com/g1"))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1")))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1")
               ))  # pointless addition: duplicates above

        d.add_graph(URIRef("https://example.com/g2"))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2")))
        d.add((uri_a, uri_b, uri_d,
               URIRef("https://example.com/g1")))  # new, uri_d

        # traditional iterator
        i_trad = 0
        for t in d.quads((None, None, None)):
            i_trad += 1

        # new Dataset.__iter__ iterator
        i_new = 0
        for t in d:
            i_new += 1

        self.assertEqual(i_new, i_trad)  # both should be 3
Esempio n. 4
0
class Fragment(object):

    HYDRA = Namespace("http://www.w3.org/ns/hydra/core#")
    VOID = Namespace("http://rdfs.org/ns/void#")
    FOAF = Namespace("http://xmlns.com/foaf/0.1/")
    DCTERMS = Namespace("http://purl.org/dc/terms/")

    def __init__(self):
        self.rdf_graph = Dataset()

    def add_data_triple(self, subject, predicate, obj):
        self.rdf_graph.add((subject, predicate, obj))

    def add_graph(self, identifier):
        self.rdf_graph.graph(identifier)

    def add_meta_quad(self, graph, subject, predicate, obj):
        self.rdf_graph.add((graph, subject, predicate, obj))

    def add_prefix(self, prefix, uri):
        self.rdf_graph.bind(prefix, uri)

    def serialize(self):
        return self.rdf_graph.serialize(format="trig", encoding="utf-8")
Esempio n. 5
0
def proc_table_access_row(queries: QueryTexts, ta: TableAccess,
                          g: Dataset) -> int:
    """
    Process a table_access entry
    :param queries: query cache and tables access
    :param ta: table_access entry
    :param g: target Graph
    :return: URI of value set to be represented
    """
    name_parts = ta.c_fullname.split('\\')[
        2:-1]  # Skip leading and trailing slashes
    concept_scheme = ACT['/'.join(name_parts[:2])]
    concept_scheme_version = ACT['/'.join(name_parts[:3])]
    g.add((concept_scheme, RDF.type, SKOS.ConceptScheme))
    g.add((concept_scheme, RDF.type, OWL.Ontology))
    g.add((concept_scheme, OWL.versionIRI, concept_scheme_version))
    return proc_ontology_table(
        queries, ta.c_table_name, concept_scheme, ta.c_fullname,
        g) if ta.c_table_name == 'concept_dimension' else 0
Esempio n. 6
0
def evaluate_ontology_entry(queries: QueryTexts, te: OntologyEntry,
                            cid: URIRef, g: Dataset) -> None:
    """
    Execute the OntologyEntry row in te and get the resulting set fact table keys
    :param queries: QueryTexts instance
    :param te: OntologyEntry instance to look up
    :param cid: parent concept identifier
    :param g: Graph to add entries to
    :return:
    """
    column_name, codes, exacts = get_te_valueset(queries, te)
    if codes:
        if EXPLICIT_MEMBERS:
            g.add((cid, RDF.type, ISO.EnumeratedConceptualDomain))
        if not COMPUTE_MEMBERS and EXPLICIT_MEMBERS:
            for code in codes:
                if is_valid_code(code):
                    g.add((cid, ISO['enumeratedConceptualDomain.hasMember'],
                           code_to_uri(code)))
        for exact in exacts:
            if is_valid_code(exact):
                g.add((cid, SKOS.exactMatch, code_to_uri(exact)))
Esempio n. 7
0
def data_structure_definition(profile, dataset_name, dataset_base_uri,
                              variables, source_path, source_hash):
    """Converts the dataset + variables to a set of rdflib Graphs (a nanopublication with provenance annotations)
    that contains the data structure definition (from the DataCube vocabulary) and
    the mappings to external datasets.

    Arguments:
    dataset     -- the name of the dataset
    variables   -- the list of dictionaries with the variables and their mappings to URIs
    profile     -- the Google signin profile
    source_path -- the path to the dataset file that was annotated
    source_hash -- the Git hash of the dataset file version of the dataset

    :returns: an RDF graph store containing a nanopublication
    """
    BASE = Namespace('{}/'.format(dataset_base_uri))
    dataset_uri = URIRef(dataset_base_uri)

    # Initialize a conjunctive graph for the whole lot
    rdf_dataset = Dataset()
    rdf_dataset.bind('qbrv', QBRV)
    rdf_dataset.bind('qbr', QBR)
    rdf_dataset.bind('qb', QB)
    rdf_dataset.bind('skos', SKOS)
    rdf_dataset.bind('prov', PROV)
    rdf_dataset.bind('np', NP)
    rdf_dataset.bind('foaf', FOAF)

    # Initialize the graphs needed for the nanopublication
    timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M")

    # Shorten the source hash to 8 digits (similar to Github)
    source_hash = source_hash[:8]

    hash_part = source_hash + '/' + timestamp

    # The Nanopublication consists of three graphs
    assertion_graph_uri = BASE['assertion/' + hash_part]
    assertion_graph = rdf_dataset.graph(assertion_graph_uri)

    provenance_graph_uri = BASE['provenance/' + hash_part]
    provenance_graph = rdf_dataset.graph(provenance_graph_uri)

    pubinfo_graph_uri = BASE['pubinfo/' + hash_part]
    pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri)

    # A URI that represents the author
    author_uri = QBR['person/' + profile['email']]

    rdf_dataset.add((author_uri, RDF.type, FOAF['Person']))
    rdf_dataset.add((author_uri, FOAF['name'], Literal(profile['name'])))
    rdf_dataset.add((author_uri, FOAF['email'], Literal(profile['email'])))
    rdf_dataset.add((author_uri, QBRV['googleId'], Literal(profile['id'])))
    try:
        rdf_dataset.add(
            (author_uri, FOAF['depiction'], URIRef(profile['image'])))
    except KeyError:
        pass

    # A URI that represents the version of the dataset source file
    dataset_version_uri = BASE[source_hash]

    # Some information about the source file used
    rdf_dataset.add((dataset_version_uri, QBRV['path'],
                     Literal(source_path, datatype=XSD.string)))
    rdf_dataset.add((dataset_version_uri, QBRV['sha1_hash'],
                     Literal(source_hash, datatype=XSD.string)))

    # ----
    # The nanopublication itself
    # ----
    nanopublication_uri = BASE['nanopublication/' + hash_part]

    rdf_dataset.add((nanopublication_uri, RDF.type, NP['Nanopublication']))
    rdf_dataset.add(
        (nanopublication_uri, NP['hasAssertion'], assertion_graph_uri))
    rdf_dataset.add((assertion_graph_uri, RDF.type, NP['Assertion']))
    rdf_dataset.add(
        (nanopublication_uri, NP['hasProvenance'], provenance_graph_uri))
    rdf_dataset.add((provenance_graph_uri, RDF.type, NP['Provenance']))
    rdf_dataset.add(
        (nanopublication_uri, NP['hasPublicationInfo'], pubinfo_graph_uri))
    rdf_dataset.add((pubinfo_graph_uri, RDF.type, NP['PublicationInfo']))

    # ----
    # The provenance graph
    # ----

    # Provenance information for the assertion graph (the data structure definition itself)
    provenance_graph.add(
        (assertion_graph_uri, PROV['wasDerivedFrom'], dataset_version_uri))
    provenance_graph.add(
        (dataset_uri, PROV['wasDerivedFrom'], dataset_version_uri))
    provenance_graph.add((assertion_graph_uri, PROV['generatedAtTime'],
                          Literal(timestamp, datatype=XSD.datetime)))
    provenance_graph.add(
        (assertion_graph_uri, PROV['wasAttributedTo'], author_uri))

    # ----
    # The publication info graph
    # ----

    # The URI of the latest version of QBer
    # TODO: should point to the actual latest commit of this QBer source file.
    # TODO: consider linking to this as the plan of some activity, rather than an activity itself.
    qber_uri = URIRef('https://github.com/CLARIAH/qber.git')

    pubinfo_graph.add((nanopublication_uri, PROV['wasGeneratedBy'], qber_uri))
    pubinfo_graph.add((nanopublication_uri, PROV['generatedAtTime'],
                       Literal(timestamp, datatype=XSD.datetime)))
    pubinfo_graph.add(
        (nanopublication_uri, PROV['wasAttributedTo'], author_uri))

    # ----
    # The assertion graph
    # ----

    structure_uri = BASE['structure']

    assertion_graph.add((dataset_uri, RDF.type, QB['DataSet']))
    assertion_graph.add((dataset_uri, RDFS.label, Literal(dataset_name)))
    assertion_graph.add(
        (structure_uri, RDF.type, QB['DataStructureDefinition']))

    assertion_graph.add((dataset_uri, QB['structure'], structure_uri))

    for variable_id, variable in variables.items():
        variable_uri = URIRef(variable['original']['uri'])
        variable_label = Literal(variable['original']['label'])
        variable_type = URIRef(variable['type'])

        codelist_uri = URIRef(variable['codelist']['original']['uri'])
        codelist_label = Literal(variable['codelist']['original']['label'])

        # The variable as component of the definition
        component_uri = safe_url(BASE,
                                 'component/' + variable['original']['label'])

        # Add link between the definition and the component
        assertion_graph.add((structure_uri, QB['component'], component_uri))

        # Add label to variable
        # TODO: We may need to do something with a changed label for the variable
        assertion_graph.add((variable_uri, RDFS.label, variable_label))

        if 'description' in variable and variable['description'] != "":
            assertion_graph.add(
                (variable_uri, RDFS.comment, Literal(variable['description'])))

        # If the variable URI is not the same as the original,
        # it is a specialization of a prior variable property.
        if variable['uri'] != str(variable_uri):
            assertion_graph.add(
                (variable_uri, RDFS['subPropertyOf'], URIRef(variable['uri'])))

        if variable_type == QB['DimensionProperty']:
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB['dimension'], variable_uri))

            # Coded variables are also of type coded property (a subproperty of dimension property)
            if variable['category'] == 'coded':
                assertion_graph.add(
                    (variable_uri, RDF.type, QB['CodedProperty']))

        elif variable_type == QB['MeasureProperty']:
            # The category 'other'
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB['measure'], variable_uri))
        elif variable_type == QB['AttributeProperty']:
            # Actually never produced by QBer at this stage
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB['attribute'], variable_uri))

        # If this variable is of category 'coded', we add codelist and URIs for
        # each variable (including mappings between value uris and etc....)
        if variable['category'] == 'coded':
            assertion_graph.add((codelist_uri, RDF.type, SKOS['Collection']))
            assertion_graph.add(
                (codelist_uri, RDFS.label, Literal(codelist_label)))

            # The variable should point to the codelist
            assertion_graph.add((variable_uri, QB['codeList'], codelist_uri))

            # The variable is mapped onto an external code list.
            # If the codelist uri is not the same as the original one, we
            # have a derived codelist.
            if variable['codelist']['uri'] != str(codelist_uri):
                assertion_graph.add((codelist_uri, PROV['wasDerivedFrom'],
                                     URIRef(variable['codelist']['uri'])))

            # Generate a SKOS concept for each of the values and map it to the
            # assigned codelist
            for value in variable['values']:
                value_uri = URIRef(value['original']['uri'])
                value_label = Literal(value['original']['label'])

                assertion_graph.add((value_uri, RDF.type, SKOS['Concept']))
                assertion_graph.add(
                    (value_uri, SKOS['prefLabel'], Literal(value_label)))
                assertion_graph.add((codelist_uri, SKOS['member'], value_uri))

                # The value has been changed, and therefore there is a mapping
                if value['original']['uri'] != value['uri']:
                    assertion_graph.add(
                        (value_uri, SKOS['exactMatch'], URIRef(value['uri'])))
                    assertion_graph.add(
                        (value_uri, RDFS.label, Literal(value['label'])))

        elif variable['category'] == 'identifier':
            # Generate a SKOS concept for each of the values
            for value in variable['values']:
                value_uri = URIRef(value['original']['uri'])
                value_label = Literal(value['original']['label'])

                assertion_graph.add((value_uri, RDF.type, SKOS['Concept']))
                assertion_graph.add(
                    (value_uri, SKOS['prefLabel'], value_label))

                # The value has been changed, and therefore there is a mapping
                if value['original']['uri'] != value['uri']:
                    assertion_graph.add(
                        (value_uri, SKOS['exactMatch'], URIRef(value['uri'])))
                    assertion_graph.add(
                        (value_uri, RDFS.label, Literal(value['label'])))

        elif variable['category'] == 'other':
            # Generate a literal for each of the values when converting the dataset (but not here)
            pass

    return rdf_dataset
Esempio n. 8
0
def proc_ontology_table(queries: QueryTexts, table_name: str,
                        concept_scheme: URIRef, basename: str,
                        g: Dataset) -> int:
    """
    Process the entries in ontology table, table
    :param queries: QueryText instance
    :param table_name: table to process
    :param concept_scheme: Owning concept scheme
    :param basename: Root name of table -- used to strip the front part of the full name path
    :param g: Graph to record assertions
    :return: Number processed
    """
    te: OntologyEntry
    table = queries.tables[table_name.lower()]
    nentries = 0
    q = queries.ont_session.query(
        table) if not NUM_CODES else queries.ont_session.query(table).order_by(
            table.c.c_fullname)
    for te in q.all():
        parent, ccode = proc_fullname(basename, te.c_fullname)
        if ccode:
            cid = ACT[ccode]
            g.add((cid, RDF.type, SKOS.Concept))
            g.add((cid, SKOS.inScheme, concept_scheme))
            g.add((cid, SKOS.prefLabel, Literal(te.c_name)))
            if te.c_basecode:
                g.add((cid, SKOS.editorialNote, Literal(te.c_basecode)))
            if te.c_tooltip:
                tip = ', '.join([e for e in te.c_tooltip.split('\\') if e])
                g.add((cid, SKOS.scopeNote, Literal(tip)))
            if parent:
                g.add((cid, SKOS.broader, ACT[parent]))
            else:
                g.add((concept_scheme, SKOS.hasTopConcept, cid))
            # Some sort of PyCharm debbugger issue here...
            va = VisualAttributes(te.c_visualattributes)
            if va.draggable:
                evaluate_ontology_entry(queries, te, cid, g)
            nentries += 1
            if NUM_CODES and nentries >= NUM_CODES:
                break
    return nentries
def createNanopubs(g):
		
	ds = Dataset()
	ds.namespace_manager.bind("ddi","http://dbmi-icode-01.dbmi.pitt.edu/mp/")
	ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#")
	ds.namespace_manager.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
	ds.namespace_manager.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
	ds.namespace_manager.bind("owl", "http://www.w3.org/2002/07/owl#")
	ds.namespace_manager.bind("obo", "http://purl.obolibrary.org/obo/")
	ds.namespace_manager.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#")
	ds.namespace_manager.bind("xsd", "http://www.w3.org/2001/XMLSchema#")
	ds.namespace_manager.bind("dc", "http://purl.org/dc/elements/1.1/")
	ds.namespace_manager.bind("mp", "http://purl.org/mp/")
	ds.namespace_manager.bind("prov", "http://www.w3.org/ns/prov#")
	ds.namespace_manager.bind("dikbEvidence", "http://dbmi-icode-01.dbmi.pitt.edu/dikb-evidence/DIKB_evidence_ontology_v1.3.owl#")
	
	bindings = g.query(interactSelect)
	for b in bindings:
	
		asIndex = b['a'].decode('utf-8').rfind('-')		   
		identifier = b['a'].decode('utf-8')[asIndex:]
		predicateType = b['t'].decode('utf-8')

		npURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-nanopub%s') % identifier
		headURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-head%s') % identifier
		pubInfoURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-pubInfo%s') % identifier
		provURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-provenance%s') % identifier
		aURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion%s') % identifier

		ds.add(( aURI, RDF.type, np.assertion))
		
		head = ds.add_graph(headURI)
		head.add((npURI, RDF.type, np['Nanopublication']))
		head.add((provURI, RDF.type, np['Provenance']))
		head.add((pubInfoURI, RDF.type, np['PublicationInfo']))
		head.add((npURI, np['hasAssertion'], aURI))
		head.add((npURI, np['hasProvenance'], provURI))
		head.add((npURI, np['hasPublicationInfo'], pubInfoURI))

		pub = ds.add_graph(pubInfoURI)
		pub.add((npURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085')))
		pub.add((npURI, prov.generatedAtTime, Literal(datetime.now()) ))
		
		if(predicateType == "http://purl.obolibrary.org/obo/DIDEO_00000000"):

			provenance = ds.add_graph(provURI)
			provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085')))
			provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) ))
			provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria")))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps ))						 
					
		elif(predicateType == "http://purl.obolibrary.org/obo/DIDEO_00000096"):

			provenance = ds.add_graph(provURI)
			provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085')))
			provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) ))
			provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria")))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) 
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_CT_PK_Genotype ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_CT_PK_Phenotype )) 
					
		elif(predicateType == "http://purl.obolibrary.org/obo/RO_0002449"):

			provenance = ds.add_graph(provURI)
			provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085')))
			provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) ))
			provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria")))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR ))
			provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) 
						
	print ds.serialize(format='trig')
Esempio n. 10
0
r1.title = "Foo"

i1_1 = Ingredient()
i1_1.name = "Foo_1"

t1 = Tag("Baz")

r1.ingredients.append(i1_1)
r1.tags.append(t1)

r1.add_prov("wasDerivedFrom", URIRef("http://recipes.com/r/Foo"))
r1.add_pub_info("wasAttributedTo", Literal("Jeff the Data Guy"))
summed = Dataset()

for quad in r1.__publish__():
    summed.add(quad)

summed.namespace_manager.bind("np", data.NP, True)
summed.namespace_manager.bind("recipe-kb", data.BASE, True)
summed.namespace_manager.bind("prov", data.PROV, True)

print(summed.serialize(format="trig").decode("utf-8"))

u1 = data.USDAEntry(12345, "CHEESE,SERIOUSLY SPICY", [])

l1 = data.Linkage(data.IngredientName(i1_1.name), u1)

summed = Dataset()

for quad in l1.__publish__():
    summed.add(quad)
Esempio n. 11
0
class DatasetTestCase(unittest.TestCase):
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest(
                "Dependencies for store '%s' not available!" % self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(
                prefix='test', dir='/tmp', suffix='.sqlite')
        elif self.store == "SPARQLUpdateStore":
            root = HOST + DB
            self.graph.open((root + "sparql", root + "update"))
        else:
            self.tmppath = mkdtemp()

        if self.store != "SPARQLUpdateStore":
            self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'urn:michel')
        self.tarek = URIRef(u'urn:tarek')
        self.bob = URIRef(u'urn:bob')
        self.likes = URIRef(u'urn:likes')
        self.hates = URIRef(u'urn:hates')
        self.pizza = URIRef(u'urn:pizza')
        self.cheese = URIRef(u'urn:cheese')

        # Use regular URIs because SPARQL endpoints like Fuseki alter short names
        self.c1 = URIRef(u'urn:context-1')
        self.c2 = URIRef(u'urn:context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0
            self.graph.remove_graph(c)

    def tearDown(self):
        self.graph.close()
        if self.store == "SPARQLUpdateStore":
            pass
        else:
            if os.path.isdir(self.tmppath):
                shutil.rmtree(self.tmppath)
            else:
                os.remove(self.tmppath)

    def testGraphAware(self):

        if not self.graph.store.graph_aware:
            return

        g = self.graph
        g1 = g.graph(self.c1)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # added graph exists
            self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                             set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph is empty
        self.assertEqual(len(g1), 0)

        g1.add((self.tarek, self.likes, self.pizza))

        # added graph still exists
        self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                         set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph contains one triple
        self.assertEqual(len(g1), 1)

        g1.remove((self.tarek, self.likes, self.pizza))

        # added graph is empty
        self.assertEqual(len(g1), 0)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # graph still exists, although empty
            self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                             set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        g.remove_graph(self.c1)

        # graph is gone
        self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                         set([DATASET_DEFAULT_GRAPH_ID]))

    def testDefaultGraph(self):
        # Something the default graph is read-only (e.g. TDB in union mode)
        if self.store == "SPARQLUpdateStore":
            print("Please make sure updating the default graph "
                  "is supported by your SPARQL endpoint")

        self.graph.add((self.tarek, self.likes, self.pizza))
        self.assertEqual(len(self.graph), 1)
        # only default exists
        self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                         set([DATASET_DEFAULT_GRAPH_ID]))

        # removing default graph removes triples but not actual graph
        self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

        self.assertEqual(len(self.graph), 0)
        # default still exists
        self.assertEqual(set(x.identifier for x in self.graph.contexts()),
                         set([DATASET_DEFAULT_GRAPH_ID]))

    def testNotUnion(self):
        # Union depends on the SPARQL endpoint configuration
        if self.store == "SPARQLUpdateStore":
            print("Please make sure your SPARQL endpoint has not configured "
                  "its default graph as the union of the named graphs")
        g1 = self.graph.graph(self.c1)
        g1.add((self.tarek, self.likes, self.pizza))

        self.assertEqual(list(self.graph.objects(self.tarek, None)),
                         [])
        self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
Esempio n. 12
0
class DatasetTestCase(unittest.TestCase):
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest(
                "Dependencies for store '%s' not available!" % self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(
                prefix='test', dir='/tmp', suffix='.sqlite')
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

        self.c1 = URIRef(u'context-1')
        self.c2 = URIRef(u'context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))

    def tearDown(self):
        self.graph.close()
        if os.path.isdir(self.tmppath):
            shutil.rmtree(self.tmppath)
        else:
            os.remove(self.tmppath)


    def testGraphAware(self): 
        if not self.graph.store.graph_aware: return 
        
        g = self.graph
        g1 = g.graph(self.c1)
        
        
        # added graph exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph is empty 
        self.assertEquals(len(g1), 0)
        
        g1.add( (self.tarek, self.likes, self.pizza) )

        # added graph still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph contains one triple
        self.assertEquals(len(g1), 1)

        g1.remove( (self.tarek, self.likes, self.pizza) )

        # added graph is empty 
        self.assertEquals(len(g1), 0)

        # graph still exists, although empty
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        g.remove_graph(self.c1)
                
        # graph is gone
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([DATASET_DEFAULT_GRAPH_ID]))
        
    def testDefaultGraph(self): 
        
        self.graph.add(( self.tarek, self.likes, self.pizza))
        self.assertEquals(len(self.graph), 1)
        # only default exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([DATASET_DEFAULT_GRAPH_ID]))

        # removing default graph removes triples but not actual graph
        self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

        self.assertEquals(len(self.graph), 0)
        # default still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()), 
                          set([DATASET_DEFAULT_GRAPH_ID]))

    def testNotUnion(self): 
        g1 = self.graph.graph(self.c1)
        g1.add((self.tarek, self.likes, self.pizza))

        self.assertEqual(list(self.graph.objects(self.tarek, None)), 
                         [])
        self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
Esempio n. 13
0
#prev_label = self.get_label()
#print "hash value: %08x" % hash
#subprocess.Popen(cmd_string, shell=True, stdin=None, stdout=None, stderr=None, close_fds=True)
#time.sleep(1)
#container_id = self.get_containerID(image_name)
#print 'CID:' + container_id
#print 'commit'

# Generate URI's for docker instance
dockerEntityuuid = str(uuid.uuid4())
dockerActivityuuid = str(uuid.uuid4())
### WARNING WARNING this needs to be generated and stored in the config.
dockerUseruuid = str(uuid.uuid4())

# Add entity triples first
ds.add((UUIDNS[dockerEntityuuid], RDF.type, PROV.Entity))
ds.add((UUIDNS[dockerEntityuuid], RDF.type, DOCKER.Entity))
ds.add((UUIDNS[dockerEntityuuid], PROV.wasGeneratedBy,
        UUIDNS[dockerActivityuuid]))
ds.add((UUIDNS[dockerEntityuuid], DOCKER.hasImageID, Literal("ImageIDString")))

# Add activity triples next including computational activity.
ds.add((UUIDNS[dockerActivityuuid], RDF.type, PROV.Activity))
ds.add((UUIDNS[dockerActivityuuid], RDF.type, CA.compuatationalActivity))
# Need sublcass of docker related activities
ds.add((UUIDNS[dockerActivityuuid], RDF.type, DOCKER.commitActivity))
ds.add((UUIDNS[dockerActivityuuid], DOCKER.hasCommand, DOCKER.commitOperation))
ds.add((UUIDNS[dockerActivityuuid], DOCKER.hasContainerID, Literal("blahID")))
ds.add((UUIDNS[dockerActivityuuid], DOCKER.hasContainerTag,
        Literal("GreatContainer")))
ds.add((UUIDNS[dockerActivityuuid], PROV.startedAtTime,
def createNanopubs(g):
		
	ds = Dataset()
	ds.namespace_manager.bind("ddi","http://dbmi-icode-01.dbmi.pitt.edu/mp/")
	ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#")
	ds.namespace_manager.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
	ds.namespace_manager.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
	ds.namespace_manager.bind("owl", "http://www.w3.org/2002/07/owl#")
	ds.namespace_manager.bind("obo", "http://purl.obolibrary.org/obo/")
	ds.namespace_manager.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#")
	ds.namespace_manager.bind("xsd", "http://www.w3.org/2001/XMLSchema#")
	ds.namespace_manager.bind("dc", "http://purl.org/dc/elements/1.1/")
	ds.namespace_manager.bind("mp", "http://purl.org/mp/")

	assertionCount = 1
	enzymeCount = 1

	pddiD = dict([line.split(',',1) for line in open('../../data/np-graphs/processed-dikb-ddis-for-nanopub.csv')])
	cL = dict([line.split('\t') for line in open('../../data/chebi_mapping.txt')])
	pL = dict([line.split('\t') for line in open('../../data/pro_mapping.txt')])
	substrateD = {}
	inhibitorD = {}
			
	bindings = g.query(interactSelect)
	for b in bindings:

		if( pddiD.has_key(str(b['c'].decode('utf-8'))) ):
			tempClaim = pddiD[ str(b['c'].decode('utf-8')) ]
			claimInfo = tempClaim.split(',')
			claimSub = claimInfo[1]
			claimObj = claimInfo[2]
			predicateType = claimInfo[0].strip('\n')
				
			if(predicateType == "increases_auc"):

				aURI =	URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount
				assertionCount += 1
			
				bn1 = BNode('1')
				bn2 = BNode('2')
				bn3 = BNode('3')
				bn4 = BNode('4')
				bn5 = BNode('5')
				bn6 = BNode('6')
				bn7 = BNode('7')
				bn8 = BNode('8')
				bn9 = BNode('9')
				bn10 = BNode('10')

				assertionLabel = cL[claimSub.strip('\n')].strip('\n') + " - " + cL[claimObj.strip('\n')].strip('\n') + " potential drug-drug interaction"

				a = ds.add_graph((aURI))
				a.add(( aURI, RDF.type, np.assertion))
				a.add(( aURI, RDF.type, owl.Class))
				a.add(( aURI, RDFS.label, (Literal(assertionLabel.lower()))))	 
				a.add(( aURI, RDFS.subClassOf, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000000")))
				a.add(( bn1, RDF.type, owl.Restriction))
				a.add(( bn1, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/IAO_0000136")))
				a.add(( bn2, RDF.type, owl.Class))
				a.add(( bn3, RDF.first, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000012")))
				a.add(( bn5, RDF.first, bn4))
				a.add(( bn3, RDF.rest, bn5))
				a.add(( bn4, RDF.type, owl.Restriction))
				a.add(( bn4, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/BFO_0000052")))
				a.add(( bn4, owl.hasValue, URIRef(claimSub.strip('\n'))))
				a.add(( bn5, RDF.rest, RDF.nil))
				a.add(( bn2, owl.intersectionOf, bn3))
				a.add(( bn1, owl.someValuesFrom, bn2))
				a.add(( aURI, RDFS.subClassOf, bn1))
				a.add(( bn6, RDF.type, owl.Restriction))
				a.add(( bn6, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/IAO_0000136")))
				a.add(( bn7, RDF.type, owl.Class))
				a.add(( bn8, RDF.first, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000013")))
				a.add(( bn10, RDF.first, bn9))
				a.add(( bn8, RDF.rest, bn10))
				a.add(( bn9, RDF.type, owl.Restriction))
				a.add(( bn9, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/BFO_0000052")))
				a.add(( bn9, owl.hasValue, URIRef(claimObj.strip('\n'))))
				a.add(( bn10, RDF.rest, RDF.nil))
				a.add(( bn7, owl.intersectionOf, bn8))
				a.add(( bn6, owl.someValuesFrom, bn7))
				a.add(( aURI, RDFS.subClassOf, bn6))

				ds.add(( aURI, mp.formalizes, b['c']))
				ds.add(( b['c'], mp.formalizedAs, aURI))
				
			elif(predicateType == "substrate_of"):
						
				aURI =	URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount
				assertionCount += 1
				
				dLabel = cL[claimSub.strip('\n')].strip('\n')
				eLabel = pL[claimObj.strip('\n')].strip('\n')
				assertionLabel = dLabel + " substrate of " + eLabel

				a = ds.add_graph((aURI))
				ds.add(( aURI, RDF.type, np.assertion))
				ds.add(( aURI, RDFS.label, Literal(assertionLabel.lower())))				   
				ds.add(( aURI, mp.formalizes, b['c']))
				ds.add(( b['c'], mp.formalizedAs, aURI))
				
				a.add(( URIRef(claimObj.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/OBI_0000427")))
				a.add(( URIRef(claimObj.strip('\n')), RDFS.label, Literal(eLabel.lower())))
				a.add(( URIRef(claimObj.strip('\n')), URIRef("http://purl.obolibrary.org/obo/DIDEO_00000096"), URIRef(claimSub.strip('\n'))))

				a.add(( URIRef(claimSub.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/CHEBI_24431")))
				a.add(( URIRef(claimSub.strip('\n')), RDFS.label, Literal(dLabel.lower())))
				
			elif(predicateType == "inhibits"):

				aURI =	URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount
				assertionCount += 1
				
				dLabel = cL[claimSub.strip('\n')].strip('\n')
				eLabel = pL[claimObj.strip('\n')].strip('\n')
				assertionLabel = dLabel + " inhibits " + eLabel
				
				a = ds.add_graph((aURI))
				ds.add(( aURI, RDF.type, np.assertion))
				ds.add(( aURI, RDFS.label, Literal(assertionLabel.lower())))
				ds.add(( aURI, mp.formalizes, b['c']))
				ds.add(( b['c'], mp.formalizedAs, aURI))
				
				a.add(( URIRef(claimSub.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/CHEBI_24431")))
				a.add(( URIRef(claimSub.strip('\n')), RDFS.label, Literal(dLabel.lower())))
				a.add(( URIRef(claimSub.strip('\n')), URIRef("http://purl.obolibrary.org/obo/RO_0002449"), URIRef(claimObj.strip('\n'))))

	print ds.serialize(format='trig')
Esempio n. 15
0
class DatasetTestCase(unittest.TestCase):
    store = "default"
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest("Dependencies for store '%s' not available!" %
                           self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(prefix="test",
                                      dir="/tmp",
                                      suffix=".sqlite")
        elif self.store == "SPARQLUpdateStore":
            root = HOST + DB
            self.graph.open((root + "sparql", root + "update"))
        else:
            self.tmppath = mkdtemp()

        if self.store != "SPARQLUpdateStore":
            self.graph.open(self.tmppath, create=True)
        self.michel = URIRef("urn:michel")
        self.tarek = URIRef("urn:tarek")
        self.bob = URIRef("urn:bob")
        self.likes = URIRef("urn:likes")
        self.hates = URIRef("urn:hates")
        self.pizza = URIRef("urn:pizza")
        self.cheese = URIRef("urn:cheese")

        # Use regular URIs because SPARQL endpoints like Fuseki alter short names
        self.c1 = URIRef("urn:context-1")
        self.c2 = URIRef("urn:context-2")

        # delete the graph for each test!
        self.graph.remove((None, None, None))
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0
            self.graph.remove_graph(c)

    def tearDown(self):
        self.graph.close()
        if self.store == "SPARQLUpdateStore":
            pass
        else:
            if os.path.isdir(self.tmppath):
                shutil.rmtree(self.tmppath)
            else:
                os.remove(self.tmppath)

    def testGraphAware(self):

        if not self.graph.store.graph_aware:
            return

        g = self.graph
        g1 = g.graph(self.c1)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # added graph exists
            self.assertEqual(
                set(x.identifier for x in self.graph.contexts()),
                set([self.c1, DATASET_DEFAULT_GRAPH_ID]),
            )

        # added graph is empty
        self.assertEqual(len(g1), 0)

        g1.add((self.tarek, self.likes, self.pizza))

        # added graph still exists
        self.assertEqual(
            set(x.identifier for x in self.graph.contexts()),
            set([self.c1, DATASET_DEFAULT_GRAPH_ID]),
        )

        # added graph contains one triple
        self.assertEqual(len(g1), 1)

        g1.remove((self.tarek, self.likes, self.pizza))

        # added graph is empty
        self.assertEqual(len(g1), 0)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # graph still exists, although empty
            self.assertEqual(
                set(x.identifier for x in self.graph.contexts()),
                set([self.c1, DATASET_DEFAULT_GRAPH_ID]),
            )

        g.remove_graph(self.c1)

        # graph is gone
        self.assertEqual(
            set(x.identifier for x in self.graph.contexts()),
            set([DATASET_DEFAULT_GRAPH_ID]),
        )

    def testDefaultGraph(self):
        # Something the default graph is read-only (e.g. TDB in union mode)
        if self.store == "SPARQLUpdateStore":
            print("Please make sure updating the default graph "
                  "is supported by your SPARQL endpoint")

        self.graph.add((self.tarek, self.likes, self.pizza))
        self.assertEqual(len(self.graph), 1)
        # only default exists
        self.assertEqual(
            set(x.identifier for x in self.graph.contexts()),
            set([DATASET_DEFAULT_GRAPH_ID]),
        )

        # removing default graph removes triples but not actual graph
        self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

        self.assertEqual(len(self.graph), 0)
        # default still exists
        self.assertEqual(
            set(x.identifier for x in self.graph.contexts()),
            set([DATASET_DEFAULT_GRAPH_ID]),
        )

    def testNotUnion(self):
        # Union depends on the SPARQL endpoint configuration
        if self.store == "SPARQLUpdateStore":
            print("Please make sure your SPARQL endpoint has not configured "
                  "its default graph as the union of the named graphs")
        g1 = self.graph.graph(self.c1)
        g1.add((self.tarek, self.likes, self.pizza))

        self.assertEqual(list(self.graph.objects(self.tarek, None)), [])
        self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])

    def testIter(self):
        """PR 1382: adds __iter__ to Dataset"""
        d = Dataset()
        uri_a = URIRef("https://example.com/a")
        uri_b = URIRef("https://example.com/b")
        uri_c = URIRef("https://example.com/c")
        uri_d = URIRef("https://example.com/d")

        d.add_graph(URIRef("https://example.com/g1"))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1")))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1")
               ))  # pointless addition: duplicates above

        d.add_graph(URIRef("https://example.com/g2"))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2")))
        d.add((uri_a, uri_b, uri_d,
               URIRef("https://example.com/g1")))  # new, uri_d

        # traditional iterator
        i_trad = 0
        for t in d.quads((None, None, None)):
            i_trad += 1

        # new Dataset.__iter__ iterator
        i_new = 0
        for t in d:
            i_new += 1

        self.assertEqual(i_new, i_trad)  # both should be 3
Esempio n. 16
0
    'icpc2p': 'http://purl.bioontology.org/ontology/ICPC2P/'
}

if __name__ == '__main__':
    df = pd.read_excel('data/neurodkg_triples.xlsx', sheet_name='Clean')
    dataset = Dataset()
    for index, row in df.iterrows():
        subj = row['Subject'].replace(' ', '')
        pred = row['Predicate']
        obj = row['Object']
        obj_id = row['Concept ID']
        # if the predicate is targetGroup: remove the characters (\ |\>|\<|\-|\_|\;|\:) from the object names
        if pred == 'targetGroup':
            obj = re.sub('\ |\>|\<|\-|\_|\;|\:', '', obj)
            # create triples containing subject (neurodkg instances), predicate (several are defined above), and object (neurodkg instances) and add them to the dataset
            dataset.add((NEURO_INST[subj], URIRef(predicate_to_uri[pred]),
                         NEURO_INST[obj]))
        # object id: differentiating between the cases of having a disease ID or not
        elif str(obj_id) != 'nan':
            print(obj_id)
            curie = obj_id.replace(' ', '').split(':')
            if len(curie) <= 1:
                print(obj_id)
            prefix = curie[0].lower()
            obj_id = curie[1]
            print(curie)
            # if a disease ID was found, then add the ID and ontology as object of the triple
            #obj_uri  = BASE[prefix+':'+obj_id]
            obj_uri = URIRef(prefix_dict[prefix] + obj_id)
            dataset.add(
                (NEURO_INST[subj], URIRef(predicate_to_uri[pred]), obj_uri))
            # if there was no disease ID in an ontology: use the disease label as object of the triple
Esempio n. 17
0
def result_to_dataset(result):
    ds = Dataset()
    for q in result.bindings:
        ds.add((q[Variable('s')], q[Variable('p')], q[Variable('o')],
                q[Variable('g')]))
    return ds
Esempio n. 18
0
def data_structure_definition(profile, dataset_name, dataset_base_uri, variables, source_path, source_hash):
    """Converts the dataset + variables to a set of rdflib Graphs (a nanopublication with provenance annotations)
    that contains the data structure definition (from the DataCube vocabulary) and
    the mappings to external datasets.

    Arguments:
    dataset     -- the name of the dataset
    variables   -- the list of dictionaries with the variables and their mappings to URIs
    profile     -- the Google signin profile
    source_path -- the path to the dataset file that was annotated
    source_hash -- the Git hash of the dataset file version of the dataset

    :returns: an RDF graph store containing a nanopublication
    """
    BASE = Namespace("{}/".format(dataset_base_uri))
    dataset_uri = URIRef(dataset_base_uri)

    # Initialize a conjunctive graph for the whole lot
    rdf_dataset = Dataset()
    rdf_dataset.bind("qbrv", QBRV)
    rdf_dataset.bind("qbr", QBR)
    rdf_dataset.bind("qb", QB)
    rdf_dataset.bind("skos", SKOS)
    rdf_dataset.bind("prov", PROV)
    rdf_dataset.bind("np", NP)
    rdf_dataset.bind("foaf", FOAF)

    # Initialize the graphs needed for the nanopublication
    timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M")

    # Shorten the source hash to 8 digits (similar to Github)
    source_hash = source_hash[:8]

    hash_part = source_hash + "/" + timestamp

    # The Nanopublication consists of three graphs
    assertion_graph_uri = BASE["assertion/" + hash_part]
    assertion_graph = rdf_dataset.graph(assertion_graph_uri)

    provenance_graph_uri = BASE["provenance/" + hash_part]
    provenance_graph = rdf_dataset.graph(provenance_graph_uri)

    pubinfo_graph_uri = BASE["pubinfo/" + hash_part]
    pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri)

    # A URI that represents the author
    author_uri = QBR["person/" + profile["email"]]

    rdf_dataset.add((author_uri, RDF.type, FOAF["Person"]))
    rdf_dataset.add((author_uri, FOAF["name"], Literal(profile["name"])))
    rdf_dataset.add((author_uri, FOAF["email"], Literal(profile["email"])))
    rdf_dataset.add((author_uri, QBRV["googleId"], Literal(profile["id"])))
    try:
        rdf_dataset.add((author_uri, FOAF["depiction"], URIRef(profile["image"])))
    except KeyError:
        pass

    # A URI that represents the version of the dataset source file
    dataset_version_uri = BASE[source_hash]

    # Some information about the source file used
    rdf_dataset.add((dataset_version_uri, QBRV["path"], Literal(source_path, datatype=XSD.string)))
    rdf_dataset.add((dataset_version_uri, QBRV["sha1_hash"], Literal(source_hash, datatype=XSD.string)))

    # ----
    # The nanopublication itself
    # ----
    nanopublication_uri = BASE["nanopublication/" + hash_part]

    rdf_dataset.add((nanopublication_uri, RDF.type, NP["Nanopublication"]))
    rdf_dataset.add((nanopublication_uri, NP["hasAssertion"], assertion_graph_uri))
    rdf_dataset.add((assertion_graph_uri, RDF.type, NP["Assertion"]))
    rdf_dataset.add((nanopublication_uri, NP["hasProvenance"], provenance_graph_uri))
    rdf_dataset.add((provenance_graph_uri, RDF.type, NP["Provenance"]))
    rdf_dataset.add((nanopublication_uri, NP["hasPublicationInfo"], pubinfo_graph_uri))
    rdf_dataset.add((pubinfo_graph_uri, RDF.type, NP["PublicationInfo"]))

    # ----
    # The provenance graph
    # ----

    # Provenance information for the assertion graph (the data structure definition itself)
    provenance_graph.add((assertion_graph_uri, PROV["wasDerivedFrom"], dataset_version_uri))
    provenance_graph.add((dataset_uri, PROV["wasDerivedFrom"], dataset_version_uri))
    provenance_graph.add((assertion_graph_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime)))
    provenance_graph.add((assertion_graph_uri, PROV["wasAttributedTo"], author_uri))

    # ----
    # The publication info graph
    # ----

    # The URI of the latest version of QBer
    # TODO: should point to the actual latest commit of this QBer source file.
    # TODO: consider linking to this as the plan of some activity, rather than an activity itself.
    qber_uri = URIRef("https://github.com/CLARIAH/qber.git")

    pubinfo_graph.add((nanopublication_uri, PROV["wasGeneratedBy"], qber_uri))
    pubinfo_graph.add((nanopublication_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime)))
    pubinfo_graph.add((nanopublication_uri, PROV["wasAttributedTo"], author_uri))

    # ----
    # The assertion graph
    # ----

    structure_uri = BASE["structure"]

    assertion_graph.add((dataset_uri, RDF.type, QB["DataSet"]))
    assertion_graph.add((dataset_uri, RDFS.label, Literal(dataset_name)))
    assertion_graph.add((structure_uri, RDF.type, QB["DataStructureDefinition"]))

    assertion_graph.add((dataset_uri, QB["structure"], structure_uri))

    for variable_id, variable in variables.items():
        variable_uri = URIRef(variable["original"]["uri"])
        variable_label = Literal(variable["original"]["label"])
        variable_type = URIRef(variable["type"])

        codelist_uri = URIRef(variable["codelist"]["original"]["uri"])
        codelist_label = Literal(variable["codelist"]["original"]["label"])

        # The variable as component of the definition
        component_uri = safe_url(BASE, "component/" + variable["original"]["label"])

        # Add link between the definition and the component
        assertion_graph.add((structure_uri, QB["component"], component_uri))

        # Add label to variable
        # TODO: We may need to do something with a changed label for the variable
        assertion_graph.add((variable_uri, RDFS.label, variable_label))

        if "description" in variable and variable["description"] != "":
            assertion_graph.add((variable_uri, RDFS.comment, Literal(variable["description"])))

        # If the variable URI is not the same as the original,
        # it is a specialization of a prior variable property.
        if variable["uri"] != str(variable_uri):
            assertion_graph.add((variable_uri, RDFS["subPropertyOf"], URIRef(variable["uri"])))

        if variable_type == QB["DimensionProperty"]:
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB["dimension"], variable_uri))

            # Coded variables are also of type coded property (a subproperty of dimension property)
            if variable["category"] == "coded":
                assertion_graph.add((variable_uri, RDF.type, QB["CodedProperty"]))

        elif variable_type == QB["MeasureProperty"]:
            # The category 'other'
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB["measure"], variable_uri))
        elif variable_type == QB["AttributeProperty"]:
            # Actually never produced by QBer at this stage
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB["attribute"], variable_uri))

        # If this variable is of category 'coded', we add codelist and URIs for
        # each variable (including mappings between value uris and etc....)
        if variable["category"] == "coded":
            assertion_graph.add((codelist_uri, RDF.type, SKOS["Collection"]))
            assertion_graph.add((codelist_uri, RDFS.label, Literal(codelist_label)))

            # The variable should point to the codelist
            assertion_graph.add((variable_uri, QB["codeList"], codelist_uri))

            # The variable is mapped onto an external code list.
            # If the codelist uri is not the same as the original one, we
            # have a derived codelist.
            if variable["codelist"]["uri"] != str(codelist_uri):
                assertion_graph.add((codelist_uri, PROV["wasDerivedFrom"], URIRef(variable["codelist"]["uri"])))

            # Generate a SKOS concept for each of the values and map it to the
            # assigned codelist
            for value in variable["values"]:
                value_uri = URIRef(value["original"]["uri"])
                value_label = Literal(value["original"]["label"])

                assertion_graph.add((value_uri, RDF.type, SKOS["Concept"]))
                assertion_graph.add((value_uri, SKOS["prefLabel"], Literal(value_label)))
                assertion_graph.add((codelist_uri, SKOS["member"], value_uri))

                # The value has been changed, and therefore there is a mapping
                if value["original"]["uri"] != value["uri"]:
                    assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"])))
                    assertion_graph.add((value_uri, RDFS.label, Literal(value["label"])))

        elif variable["category"] == "identifier":
            # Generate a SKOS concept for each of the values
            for value in variable["values"]:
                value_uri = URIRef(value["original"]["uri"])
                value_label = Literal(value["original"]["label"])

                assertion_graph.add((value_uri, RDF.type, SKOS["Concept"]))
                assertion_graph.add((value_uri, SKOS["prefLabel"], value_label))

                # The value has been changed, and therefore there is a mapping
                if value["original"]["uri"] != value["uri"]:
                    assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"])))
                    assertion_graph.add((value_uri, RDFS.label, Literal(value["label"])))

        elif variable["category"] == "other":
            # Generate a literal for each of the values when converting the dataset (but not here)
            pass

    return rdf_dataset
Esempio n. 19
0
# subprocess.Popen(cmd_string, shell=True, stdin=None,
# stdout=None, stderr=None, close_fds=True)
# time.sleep(1)
# container_id = self.get_containerID(image_name)
# print 'CID:' + container_id
# print 'commit'


# Generate URI's for docker instance
dockerEntityuuid = str(uuid.uuid4())
dockerActivityuuid = str(uuid.uuid4())
### WARNING WARNING this needs to be generated and stored in the config.
dockerUseruuid = str(uuid.uuid4())

# Add entity triples first
ds.add((UUIDNS[dockerEntityuuid], RDF.type, PROV.Entity))
ds.add((UUIDNS[dockerEntityuuid], RDF.type, DOCKER.Entity))
ds.add((UUIDNS[dockerEntityuuid], PROV.wasGeneratedBy,
        UUIDNS[dockerActivityuuid]))
ds.add((UUIDNS[dockerEntityuuid], DOCKER.hasImageID, Literal("ImageIDString")))

# Add activity triples next including computational activity.
ds.add((UUIDNS[dockerActivityuuid], RDF.type, PROV.Activity))
ds.add((UUIDNS[dockerActivityuuid], RDF.type, CA.compuatationalActivity))
# Need sublcass of docker related activities
ds.add((UUIDNS[dockerActivityuuid], RDF.type, DOCKER.commitActivity))
ds.add((UUIDNS[dockerActivityuuid], DOCKER.hasCommand, DOCKER.commitOperation))
ds.add((UUIDNS[dockerActivityuuid], DOCKER.hasContainerID, Literal("blahID")))
ds.add((UUIDNS[dockerActivityuuid],
        DOCKER.hasContainerTag, Literal("GreatContainer")))
ds.add((UUIDNS[dockerActivityuuid],
Esempio n. 20
0
                 websiteBool = False
         else:
             if 'http' in row['straat']:
                 website = Literal(row['straat'],
                                   datatype=XSD['string'])
             elif 'http' in row['internet']:
                 website = Literal(row['internet'],
                                   datatype=XSD['string'])
             elif 'http' in row['plaats_2']:
                 website = Literal(row['plaats_2'],
                                   datatype=XSD['string'])
             else:
                 websiteBool = False
     else:
         website = Literal(row['Website'], datatype=XSD['string'])
 dataset.add((thing, RDFS['label'], name))
 dataset.add((thing, DBP['latitude'], lat))
 dataset.add((thing, DBP['longitude'], lng))
 dataset.add((thing, WGS['lat'], latw))
 dataset.add((thing, WGS['long'], lngw))
 dataset.add((thing, GEO['hasGeometry'], thinggeo))
 dataset.add((thinggeo, RDF['type'], GEO['Geometry']))
 dataset.add((thinggeo, GEO['asWKT'], point))
 dataset.add((thing, SCHEMA['postalCode'], pcode))
 dataset.add((thing, VCARD['hasPostalCode'], pcode))
 dataset.add((thing, DBO['PostalCode'], pcode))
 dataset.add((thing, SCHEMA['PostalAddress'], addr))
 dataset.add((thing, VCARD['hasAddress'], addr))
 dataset.add((thing, DBO['address'], addr))
 #eventueel voor BAG
 if short[i] != "toe":
Esempio n. 21
0
class DatasetTestCase(unittest.TestCase):
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest("Dependencies for store '%s' not available!" %
                           self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(prefix='test',
                                      dir='/tmp',
                                      suffix='.sqlite')
        elif self.store == "SPARQLUpdateStore":
            root = "http://localhost:3030/ukpp/"
            self.graph.open((root + "sparql", root + "update"))
        else:
            self.tmppath = mkdtemp()

        if self.store != "SPARQLUpdateStore":
            self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'urn:michel')
        self.tarek = URIRef(u'urn:tarek')
        self.bob = URIRef(u'urn:bob')
        self.likes = URIRef(u'urn:likes')
        self.hates = URIRef(u'urn:hates')
        self.pizza = URIRef(u'urn:pizza')
        self.cheese = URIRef(u'urn:cheese')

        # Use regular URIs because SPARQL endpoints like Fuseki alter short names
        self.c1 = URIRef(u'urn:context-1')
        self.c2 = URIRef(u'urn:context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0
            self.graph.remove_graph(c)

    def tearDown(self):
        self.graph.close()
        if self.store == "SPARQLUpdateStore":
            pass
        else:
            if os.path.isdir(self.tmppath):
                shutil.rmtree(self.tmppath)
            else:
                os.remove(self.tmppath)

    def testGraphAware(self):

        if not self.graph.store.graph_aware: return

        g = self.graph
        g1 = g.graph(self.c1)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # added graph exists
            self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                              set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph is empty
        self.assertEquals(len(g1), 0)

        g1.add((self.tarek, self.likes, self.pizza))

        # added graph still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph contains one triple
        self.assertEquals(len(g1), 1)

        g1.remove((self.tarek, self.likes, self.pizza))

        # added graph is empty
        self.assertEquals(len(g1), 0)

        # Some SPARQL endpoint backends (e.g. TDB) do not consider
        # empty named graphs
        if self.store != "SPARQLUpdateStore":
            # graph still exists, although empty
            self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                              set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        g.remove_graph(self.c1)

        # graph is gone
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([DATASET_DEFAULT_GRAPH_ID]))

    def testDefaultGraph(self):
        # Something the default graph is read-only (e.g. TDB in union mode)
        if self.store == "SPARQLUpdateStore":
            print "Please make sure updating the default graph " \
                  "is supported by your SPARQL endpoint"

        self.graph.add((self.tarek, self.likes, self.pizza))
        self.assertEquals(len(self.graph), 1)
        # only default exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([DATASET_DEFAULT_GRAPH_ID]))

        # removing default graph removes triples but not actual graph
        self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

        self.assertEquals(len(self.graph), 0)
        # default still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([DATASET_DEFAULT_GRAPH_ID]))

    def testNotUnion(self):
        # Union depends on the SPARQL endpoint configuration
        if self.store == "SPARQLUpdateStore":
            print "Please make sure your SPARQL endpoint has not configured " \
                  "its default graph as the union of the named graphs"
        g1 = self.graph.graph(self.c1)
        g1.add((self.tarek, self.likes, self.pizza))

        self.assertEqual(list(self.graph.objects(self.tarek, None)), [])
        self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
Esempio n. 22
0
# Create an empty Dataset
d = Dataset()
# Add a namespace prefix to it, just like for Graph
d.bind("ex", Namespace("http://example.com/"))

# Declare a Graph URI to be used to identify a Graph
graph_1 = URIRef("http://example.com/graph-1")

# Add an empty Graph, identified by graph_1, to the Dataset
d.graph(identifier=graph_1)

# Add two quads to Graph graph_1 in the Dataset
d.add((
    URIRef("http://example.com/subject-x"),
    URIRef("http://example.com/predicate-x"),
    Literal("Triple X"),
    graph_1
))
d.add((
    URIRef("http://example.com/subject-z"),
    URIRef("http://example.com/predicate-z"),
    Literal("Triple Z"),
    graph_1
))

# Add another quad to the Dataset to a non-existent Graph:
# the Graph is created automatically
d.add((
    URIRef("http://example.com/subject-y"),
    URIRef("http://example.com/predicate-y"),
    Literal("Triple Y"),
Esempio n. 23
0

# The Nanopublication consists of three graphs
assertion_graph_uri = BASE['assertion/' + hash_part]
assertion_graph = rdf_dataset.graph(assertion_graph_uri)

provenance_graph_uri = BASE['provenance/' + hash_part]
provenance_graph = rdf_dataset.graph(provenance_graph_uri)

pubinfo_graph_uri = BASE['pubinfo/' + hash_part]
pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri)
    

# A URI that represents the author
author_uri = QBR['person/[email protected]']
rdf_dataset.add((author_uri, RDF.type, FOAF['Person']))
rdf_dataset.add((author_uri, FOAF['name'], Literal('Kathrin Dentler')))
rdf_dataset.add((author_uri, FOAF['email'], Literal('*****@*****.**')))
rdf_dataset.add((author_uri, FOAF['depiction'], URIRef('http://www.dentler.org/kathrin.jpg')))
# rdf_dataset.add((author_uri, QBRV['googleId'], Literal(profile['id'])))


# A URI that represents the version of the dataset source file
dataset_version_uri = BASE[source_hash]
    
# Some information about the source file used
rdf_dataset.add((dataset_version_uri, QBRV['path'], Literal(pathtofile, datatype=XSD.string)))
rdf_dataset.add((dataset_version_uri, QBRV['sha1_hash'], Literal(source_hash, datatype=XSD.string)))


# ----
Esempio n. 24
0
class DatasetTestCase(unittest.TestCase):
    store = 'default'
    slow = True
    tmppath = None

    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest("Dependencies for store '%s' not available!" %
                           self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(prefix='test',
                                      dir='/tmp',
                                      suffix='.sqlite')
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

        self.c1 = URIRef(u'context-1')
        self.c2 = URIRef(u'context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))

    def tearDown(self):
        self.graph.close()
        if os.path.isdir(self.tmppath):
            shutil.rmtree(self.tmppath)
        else:
            os.remove(self.tmppath)

    def testGraphAware(self):
        if not self.graph.store.graph_aware: return

        g = self.graph
        g1 = g.graph(self.c1)

        # added graph exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph is empty
        self.assertEquals(len(g1), 0)

        g1.add((self.tarek, self.likes, self.pizza))

        # added graph still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        # added graph contains one triple
        self.assertEquals(len(g1), 1)

        g1.remove((self.tarek, self.likes, self.pizza))

        # added graph is empty
        self.assertEquals(len(g1), 0)

        # graph still exists, although empty
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

        g.remove_graph(self.c1)

        # graph is gone
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([DATASET_DEFAULT_GRAPH_ID]))

    def testDefaultGraph(self):

        self.graph.add((self.tarek, self.likes, self.pizza))
        self.assertEquals(len(self.graph), 1)
        # only default exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([DATASET_DEFAULT_GRAPH_ID]))

        # removing default graph removes triples but not actual graph
        self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID)

        self.assertEquals(len(self.graph), 0)
        # default still exists
        self.assertEquals(set(x.identifier for x in self.graph.contexts()),
                          set([DATASET_DEFAULT_GRAPH_ID]))

    def testNotUnion(self):
        g1 = self.graph.graph(self.c1)
        g1.add((self.tarek, self.likes, self.pizza))

        self.assertEqual(list(self.graph.objects(self.tarek, None)), [])
        self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])