def datatypes():
    g = ProvDocument()
    ex = Namespace("ex", "http://example.org/")
    g.add_namespace(ex)

    attributes = {
        "ex:int":
        100,
        "ex:float":
        100.123456,
        "ex:long":
        123456789000,
        "ex:bool":
        True,
        "ex:str":
        "Some string",
        "ex:unicode":
        "Some unicode string with accents: Huỳnh Trung Đông",
        "ex:timedate":
        datetime.datetime(2012, 12, 12, 14, 7, 48),
        "ex:intstr":
        Literal("PROV Internationalized string",
                PROV["InternationalizedString"], "en"),
    }
    multiline = """Line1
    Line2
Line3"""
    attributes["ex:multi-line"] = multiline
    g.entity("ex:e1", attributes)
    return g
def prov_db_unknown_prov_typ_example():
    doc = ProvDocument()
    doc.add_namespace("ex", "https://example.com")
    doc.entity(identifier="ex:Entity1")
    doc.entity(identifier="ex:Entity2")
    doc.influence(influencee="ex:Entity1", influencer="ex:Entity2")
    return doc
Beispiel #3
0
def test_cmip6_data_citation_url(tmp_path):
    """Test3: CMIP6 info_url is retrieved from ES-DOC."""
    # Create fake provenance
    provenance = ProvDocument()
    provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file')
    provenance.add_namespace('attribute',
                             uri=ESMVALTOOL_URI_PREFIX + 'attribute')
    attributes = {
        'attribute:mip_era': 'CMIP6',
        'attribute:activity_id': 'activity',
        'attribute:institution_id': 'institution',
        'attribute:source_id': 'source',
        'attribute:experiment_id': 'experiment',
    }
    filename = str(tmp_path / 'output.nc')
    provenance.entity('file:' + filename, attributes)
    _write_citation_files(filename, provenance)
    citation_url = tmp_path / 'output_data_citation_info.txt'

    # Create fake info url
    fake_url_prefix = '.'.join(attributes.values())
    text = '\n'.join([
        "Follow the links below to find more information about CMIP6 data:",
        f"- {CMIP6_URL_STEM}/cmip6?input={fake_url_prefix}",
        '',
    ])
    assert citation_url.read_text() == text
Beispiel #4
0
def test_references(tmp_path, monkeypatch):
    """Test1: references are replaced with bibtex."""
    # Create fake provenance
    provenance = ProvDocument()
    provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file')
    provenance.add_namespace('attribute',
                             uri=ESMVALTOOL_URI_PREFIX + 'attribute')
    filename = str(tmp_path / 'output.nc')
    attributes = {
        'attribute:references': 'test_tag',
        'attribute:script_file': 'diagnostics.py'
    }
    provenance.entity('file:' + filename, attributes)

    # Create fake bibtex references tag file
    references_path = tmp_path / 'references'
    references_path.mkdir()
    monkeypatch.setattr(esmvalcore._citation.DIAGNOSTICS, 'path', tmp_path)
    fake_bibtex_file = references_path / 'test_tag.bibtex'
    fake_bibtex = "Fake bibtex file content\n"
    fake_bibtex_file.write_text(fake_bibtex)

    _write_citation_files(filename, provenance)
    citation_file = tmp_path / 'output_citation.bibtex'
    citation = citation_file.read_text()
    assert citation == '\n'.join([ESMVALTOOL_PAPER, fake_bibtex])
Beispiel #5
0
def datatypes():
    g = ProvDocument()
    ex = Namespace('ex', 'http://example.org/')
    g.add_namespace(ex)

    attributes = {
        'ex:int':
        100,
        'ex:float':
        100.123456,
        'ex:long':
        123456789000,
        'ex:bool':
        True,
        'ex:str':
        'Some string',
        'ex:unicode':
        u'Some unicode string with accents: Huỳnh Trung Đông',
        'ex:timedate':
        datetime.datetime(2012, 12, 12, 14, 7, 48),
        'ex:intstr':
        Literal("PROV Internationalized string",
                PROV["InternationalizedString"], "en"),
    }
    multiline = """Line1
    Line2
Line3"""
    attributes['ex:multi-line'] = multiline
    g.entity('ex:e1', attributes)
    return g
Beispiel #6
0
    def deriveDependency(self, aDO, aRO, derivedList):

        d1 = ProvDocument()  # d1 is now an empty provenance document
        d1.add_namespace("dt", "http://cs.ncl.ac.uk/dtsim/")
        e1 = d1.entity(DTns + aRO.id)  # deriving
        ag1 = d1.agent(DTns + str(aDO.id))
        for der in derivedList:
            # create provlet
            e2 = d1.entity(DTns + der.id)  # derived
            d1.wasAttributedTo(e2, ag1)
            d1.wasDerivedFrom(e2, e1)

            # update upstream pointer
            der.upstream = [(aRO, None)]  # aRO is upstream from aRO with no activity

            # update downstream
            aRO.downstream.append((der, None))  # aR1 is downstream from aR1 with no activity

        # update global graph
        e1 = pGlobal.entity(DTns + aRO.id)  # deriving
        ag1 = pGlobal.agent(DTns + str(aDO.id))
        pGlobal.wasAttributedTo(e2, ag1)
        for der in derivedList:
            e2 = pGlobal.entity(DTns + der.id)  # derived
            pGlobal.wasDerivedFrom(e2, e1)

        # trigger credit recomputation
        for der in derivedList:
            # aRO needs its credit updated with aRO1.credit
            aCreditManager.addDerivationCredit(aRO, der.currentTotalCredit)

        # 		self.notify(d1)
        return d1
Beispiel #7
0
    def test_xsd_qnames(self):
        prov_doc = ProvDocument()
        ex = Namespace('ex', 'http://www.example.org')
        prov_doc.add_namespace(ex)

        an_xsd_qname = XSDQName(ex['a_value'])
        prov_doc.entity('ex:e1', {'prov:value': an_xsd_qname})

        self.assertPROVJSONRoundTripEquivalence(prov_doc)
Beispiel #8
0
def deletion(graph: ProvDocument, package: CommitModelPackage, action: Deletion) -> ProvDocument:
    """Add model for a deleted file."""
    file, file_version = action
    commit = package.commit
    graph.entity(*file)
    graph.entity(*file_version)
    graph.specializationOf(file_version.id, file.id)
    graph.wasInvalidatedBy(file_version.id, commit.id)
    return graph
Beispiel #9
0
def bundles2():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/bundles2.provn
    #===========================================================================
    # document
    g = ProvDocument()

    #   prefix ex  <http://example.org/example/>
    g.add_namespace("ex", "http://www.example.com/")

    #   prefix alice  <http://example.org/alice/>
    #   prefix bob  <http://example.org/bob/>
    g.add_namespace('alice', 'http://example.org/alice/')
    g.add_namespace('bob', 'http://example.org/bob/')

    #   entity(bob:bundle4, [prov:type='prov:Bundle'])
    #   wasGeneratedBy(bob:bundle4, -, 2012-05-24T10:30:00)
    #   agent(ex:Bob)
    #   wasAttributedTo(bob:bundle4, ex:Bob)
    g.entity('bob:bundle4', {'prov:type': PROV['Bundle']})
    g.wasGeneratedBy('bob:bundle4', time='2012-05-24T10:30:00')
    g.agent('ex:Bob')
    g.wasAttributedTo('bob:bundle4', 'ex:Bob')

    #   entity(alice:bundle5, [ prov:type='prov:Bundle' ])
    #   wasGeneratedBy(alice:bundle5, -, 2012-05-25T11:15:00)
    #   agent(ex:Alice)
    #   wasAttributedTo(alice:bundle5, ex:Alice)
    g.entity('alice:bundle5', {'prov:type': PROV['Bundle']})
    g.wasGeneratedBy('alice:bundle5', time='2012-05-25T11:15:00')
    g.agent('ex:Alice')
    g.wasAttributedTo('alice:bundle5', 'ex:Alice')

    #   bundle bob:bundle4
    #     entity(ex:report1, [ prov:type="report", ex:version=1 ])
    #     wasGeneratedBy(ex:report1, -, 2012-05-24T10:00:01)
    #   endBundle
    b4 = g.bundle('bob:bundle4')
    b4.entity('ex:report1', {'prov:type': "report", 'ex:version': 1})
    b4.wasGeneratedBy('ex:report1', time='2012-05-24T10:00:01')

    #   bundle alice:bundle5
    #     entity(ex:report1bis)
    #     mentionOf(ex:report1bis, ex:report1, bob:bundle4)
    #     entity(ex:report2, [ prov:type="report", ex:version=2 ])
    #     wasGeneratedBy(ex:report2, -, 2012-05-25T11:00:01)
    #     wasDerivedFrom(ex:report2, ex:report1bis)
    #   endBundle
    b5 = g.bundle('alice:bundle5')
    b5.entity('ex:report1bis')
    b5.mentionOf('ex:report1bis', 'ex:report1', 'bob:bundle4')
    b5.entity('ex:report2', [('prov:type', "report"), ('ex:version', 2)])
    b5.wasGeneratedBy('ex:report2', time='2012-05-25T11:00:01')
    b5.wasDerivedFrom('ex:report2', 'ex:report1bis')

    # endDocument
    return g
Beispiel #10
0
def bundles2():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/bundles2.provn
    #===========================================================================
    # document
    g = ProvDocument()

    #   prefix ex  <http://example.org/example/>
    g.add_namespace("ex", "http://www.example.com/")

    #   prefix alice  <http://example.org/alice/>
    #   prefix bob  <http://example.org/bob/>
    g.add_namespace('alice', 'http://example.org/alice/')
    g.add_namespace('bob', 'http://example.org/bob/')

    #   entity(bob:bundle4, [prov:type='prov:Bundle'])
    #   wasGeneratedBy(bob:bundle4, -, 2012-05-24T10:30:00)
    #   agent(ex:Bob)
    #   wasAttributedTo(bob:bundle4, ex:Bob)
    g.entity('bob:bundle4', {'prov:type': PROV['Bundle']})
    g.wasGeneratedBy('bob:bundle4', time='2012-05-24T10:30:00')
    g.agent('ex:Bob')
    g.wasAttributedTo('bob:bundle4', 'ex:Bob')

    #   entity(alice:bundle5, [ prov:type='prov:Bundle' ])
    #   wasGeneratedBy(alice:bundle5, -, 2012-05-25T11:15:00)
    #   agent(ex:Alice)
    #   wasAttributedTo(alice:bundle5, ex:Alice)
    g.entity('alice:bundle5', {'prov:type': PROV['Bundle']})
    g.wasGeneratedBy('alice:bundle5', time='2012-05-25T11:15:00')
    g.agent('ex:Alice')
    g.wasAttributedTo('alice:bundle5', 'ex:Alice')

    #   bundle bob:bundle4
    #     entity(ex:report1, [ prov:type="report", ex:version=1 ])
    #     wasGeneratedBy(ex:report1, -, 2012-05-24T10:00:01)
    #   endBundle
    b4 = g.bundle('bob:bundle4')
    b4.entity('ex:report1', {'prov:type': "report", 'ex:version': 1})
    b4.wasGeneratedBy('ex:report1', time='2012-05-24T10:00:01')

    #   bundle alice:bundle5
    #     entity(ex:report1bis)
    #     mentionOf(ex:report1bis, ex:report1, bob:bundle4)
    #     entity(ex:report2, [ prov:type="report", ex:version=2 ])
    #     wasGeneratedBy(ex:report2, -, 2012-05-25T11:00:01)
    #     wasDerivedFrom(ex:report2, ex:report1bis)
    #   endBundle
    b5 = g.bundle('alice:bundle5')
    b5.entity('ex:report1bis')
    b5.mentionOf('ex:report1bis', 'ex:report1', 'bob:bundle4')
    b5.entity('ex:report2', [('prov:type', "report"), ('ex:version', 2)])
    b5.wasGeneratedBy('ex:report2', time='2012-05-25T11:00:01')
    b5.wasDerivedFrom('ex:report2', 'ex:report1bis')

    # endDocument
    return g
def bundles2():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/bundles2.provn
    # ===========================================================================
    # document
    g = ProvDocument()

    #   prefix ex  <http://example.org/example/>
    g.add_namespace("ex", "http://www.example.com/")

    #   prefix alice  <http://example.org/alice/>
    #   prefix bob  <http://example.org/bob/>
    g.add_namespace("alice", "http://example.org/alice/")
    g.add_namespace("bob", "http://example.org/bob/")

    #   entity(bob:bundle4, [prov:type='prov:Bundle'])
    #   wasGeneratedBy(bob:bundle4, -, 2012-05-24T10:30:00)
    #   agent(ex:Bob)
    #   wasAttributedTo(bob:bundle4, ex:Bob)
    g.entity("bob:bundle4", {"prov:type": PROV["Bundle"]})
    g.wasGeneratedBy("bob:bundle4", time="2012-05-24T10:30:00")
    g.agent("ex:Bob")
    g.wasAttributedTo("bob:bundle4", "ex:Bob")

    #   entity(alice:bundle5, [ prov:type='prov:Bundle' ])
    #   wasGeneratedBy(alice:bundle5, -, 2012-05-25T11:15:00)
    #   agent(ex:Alice)
    #   wasAttributedTo(alice:bundle5, ex:Alice)
    g.entity("alice:bundle5", {"prov:type": PROV["Bundle"]})
    g.wasGeneratedBy("alice:bundle5", time="2012-05-25T11:15:00")
    g.agent("ex:Alice")
    g.wasAttributedTo("alice:bundle5", "ex:Alice")

    #   bundle bob:bundle4
    #     entity(ex:report1, [ prov:type="report", ex:version=1 ])
    #     wasGeneratedBy(ex:report1, -, 2012-05-24T10:00:01)
    #   endBundle
    b4 = g.bundle("bob:bundle4")
    b4.entity("ex:report1", {"prov:type": "report", "ex:version": 1})
    b4.wasGeneratedBy("ex:report1", time="2012-05-24T10:00:01")

    #   bundle alice:bundle5
    #     entity(ex:report1bis)
    #     mentionOf(ex:report1bis, ex:report1, bob:bundle4)
    #     entity(ex:report2, [ prov:type="report", ex:version=2 ])
    #     wasGeneratedBy(ex:report2, -, 2012-05-25T11:00:01)
    #     wasDerivedFrom(ex:report2, ex:report1bis)
    #   endBundle
    b5 = g.bundle("alice:bundle5")
    b5.entity("ex:report1bis")
    b5.mentionOf("ex:report1bis", "ex:report1", "bob:bundle4")
    b5.entity("ex:report2", [("prov:type", "report"), ("ex:version", 2)])
    b5.wasGeneratedBy("ex:report2", time="2012-05-25T11:00:01")
    b5.wasDerivedFrom("ex:report2", "ex:report1bis")

    # endDocument
    return g
Beispiel #12
0
def long_literals():
    g = ProvDocument()

    long_uri = "http://Lorem.ipsum/dolor/sit/amet/consectetur/adipiscing/elit/Quisque/vel/sollicitudin/felis/nec/venenatis/massa/Aenean/lectus/arcu/sagittis/sit/amet/nisl/nec/varius/eleifend/sem/In/hac/habitasse/platea/dictumst/Aliquam/eget/fermentum/enim/Curabitur/auctor/elit/non/ipsum/interdum/at/orci/aliquam/"
    ex = Namespace('ex', long_uri)
    g.add_namespace(ex)

    g.entity('ex:e1', {'prov:label': 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec pellentesque luctus nulla vel ullamcorper. Donec sit amet ligula sit amet lorem pretium rhoncus vel vel lorem. Sed at consequat metus, eget eleifend massa. Fusce a facilisis turpis. Lorem volutpat.'})

    return g
Beispiel #13
0
def bundles1():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/bundles1.provn
    #===============================================================================
    # document
    g = ProvDocument()

    #   prefix ex  <http://example.org/example/>
    EX = Namespace("ex", "http://www.example.com/")
    g.add_namespace(EX)

    #   prefix alice  <http://example.org/alice/>
    #   prefix bob  <http://example.org/bob/>
    g.add_namespace('alice', 'http://example.org/alice/')
    g.add_namespace('bob', 'http://example.org/bob/')

    #   entity(bob:bundle1, [prov:type='prov:Bundle'])
    g.entity('bob:bundle1', {'prov:type': PROV['Bundle']})
    #   wasGeneratedBy(bob:bundle1, -, 2012-05-24T10:30:00)
    g.wasGeneratedBy('bob:bundle1', time='2012-05-24T10:30:00')
    #   agent(ex:Bob)
    g.agent('ex:Bob')
    #   wasAttributedTo(bob:bundle1, ex:Bob)
    g.wasAttributedTo('bob:bundle1', 'ex:Bob')

    #   entity(alice:bundle2, [ prov:type='prov:Bundle' ])
    g.entity('alice:bundle2', {'prov:type': PROV['Bundle']})
    #   wasGeneratedBy(alice:bundle2, -, 2012-05-25T11:15:00)
    g.wasGeneratedBy('alice:bundle2', time='2012-05-25T11:15:00')
    #   agent(ex:Alice)
    g.agent('ex:Alice')
    #   wasAttributedTo(alice:bundle2, ex:Alice)
    g.wasAttributedTo('alice:bundle2', 'ex:Alice')

    #   bundle bob:bundle1
    b1 = g.bundle('bob:bundle1')
    #     entity(ex:report1, [ prov:type="report", ex:version=1 ])
    b1.entity('ex:report1', {'prov:type': "report", 'ex:version': 1})
    #     wasGeneratedBy(ex:report1, -, 2012-05-24T10:00:01)
    b1.wasGeneratedBy('ex:report1', time='2012-05-24T10:00:01')
    #   endBundle

    #   bundle alice:bundle2
    b2 = g.bundle('alice:bundle2')
    #     entity(ex:report1)
    b2.entity('ex:report1')
    #     entity(ex:report2, [ prov:type="report", ex:version=2 ])
    b2.entity('ex:report2', {'prov:type': "report", 'ex:version': 2})
    #     wasGeneratedBy(ex:report2, -, 2012-05-25T11:00:01)
    b2.wasGeneratedBy('ex:report2', time='2012-05-25T11:00:01')
    #     wasDerivedFrom(ex:report2, ex:report1)
    b2.wasDerivedFrom('ex:report2', 'ex:report1')
    #   endBundle

    # endDocument
    return g
def bundles1():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/bundles1.provn
    # ===============================================================================
    # document
    g = ProvDocument()

    #   prefix ex  <http://example.org/example/>
    EX = Namespace("ex", "http://www.example.com/")
    g.add_namespace(EX)

    #   prefix alice  <http://example.org/alice/>
    #   prefix bob  <http://example.org/bob/>
    g.add_namespace("alice", "http://example.org/alice/")
    g.add_namespace("bob", "http://example.org/bob/")

    #   entity(bob:bundle1, [prov:type='prov:Bundle'])
    g.entity("bob:bundle1", {"prov:type": PROV["Bundle"]})
    #   wasGeneratedBy(bob:bundle1, -, 2012-05-24T10:30:00)
    g.wasGeneratedBy("bob:bundle1", time="2012-05-24T10:30:00")
    #   agent(ex:Bob)
    g.agent("ex:Bob")
    #   wasAttributedTo(bob:bundle1, ex:Bob)
    g.wasAttributedTo("bob:bundle1", "ex:Bob")

    #   entity(alice:bundle2, [ prov:type='prov:Bundle' ])
    g.entity("alice:bundle2", {"prov:type": PROV["Bundle"]})
    #   wasGeneratedBy(alice:bundle2, -, 2012-05-25T11:15:00)
    g.wasGeneratedBy("alice:bundle2", time="2012-05-25T11:15:00")
    #   agent(ex:Alice)
    g.agent("ex:Alice")
    #   wasAttributedTo(alice:bundle2, ex:Alice)
    g.wasAttributedTo("alice:bundle2", "ex:Alice")

    #   bundle bob:bundle1
    b1 = g.bundle("bob:bundle1")
    #     entity(ex:report1, [ prov:type="report", ex:version=1 ])
    b1.entity("ex:report1", {"prov:type": "report", "ex:version": 1})
    #     wasGeneratedBy(ex:report1, -, 2012-05-24T10:00:01)
    b1.wasGeneratedBy("ex:report1", time="2012-05-24T10:00:01")
    #   endBundle

    #   bundle alice:bundle2
    b2 = g.bundle("alice:bundle2")
    #     entity(ex:report1)
    b2.entity("ex:report1")
    #     entity(ex:report2, [ prov:type="report", ex:version=2 ])
    b2.entity("ex:report2", {"prov:type": "report", "ex:version": 2})
    #     wasGeneratedBy(ex:report2, -, 2012-05-25T11:00:01)
    b2.wasGeneratedBy("ex:report2", time="2012-05-25T11:00:01")
    #     wasDerivedFrom(ex:report2, ex:report1)
    b2.wasDerivedFrom("ex:report2", "ex:report1")
    #   endBundle

    # endDocument
    return g
def job2prov(job):
    """
    Create ProvDocument based on job description
    :param job: UWS job
    :return: ProvDocument
    """

    # job.jdl.content = {
    #     'description': description,
    #     'parameters': parameters,
    #     'results': results,
    #     'executionduration': execdur,
    #     'quote': quote
    # }
    # parameters[pname] = {
    #     'type': p.get('type'),
    #     'required': p.get('required'),
    #     'default': p.get('default'),
    #     'description': list(p)[0].text,
    # }
    # results[r.get('value')] = {
    #     'mediaType': r.get('mediaType'),
    #     'default': r.get('default'),
    #     'description': list(r)[0].text,
    # }

    pdoc = ProvDocument()
    # Declaring namespaces for various prefixes used in the example
    pdoc.add_namespace('prov', 'http://www.w3.org/ns/prov#')
    pdoc.add_namespace('voprov', 'http://www.ivoa.net/ns/voprov#')
    pdoc.add_namespace('cta', 'http://www.cta-observatory.org#')
    pdoc.add_namespace('uwsdata', 'https://voparis-uws-test.obspm.fr/rest/' + job.jobname + '/' + job.jobid + '/')
    pdoc.add_namespace('ctajobs', 'http://www.cta-observatory.org#')
    # Adding an activity
    ctbin = pdoc.activity('ctajobs:' + job.jobname, job.start_time, job.end_time)
    # TODO: add job description, version, url, ...
    # Agent
    pdoc.agent('cta:consortium', other_attributes={'prov:type': "Organization"})
    pdoc.wasAssociatedWith(ctbin, 'cta:consortium')
    # Entities, in and out with relations
    e_in = []
    for pname, pdict in job.jdl.content['parameters'].iteritems():
        #if pname.startswith('in'):
        if any(x in pdict['type'] for x in ['file', 'xs:anyURI']):
            e_in.append(pdoc.entity('uwsdata:parameters/' + pname))
            # TODO: use publisher_did? add prov attributes, add voprov attributes?
            ctbin.used(e_in[-1])
    e_out = []
    for rname, rdict in job.jdl.content['results'].iteritems():
        e_out.append(pdoc.entity('uwsdata:results/' + rname))
        # TODO: use publisher_did? add prov attributes, add voprov attributes?
        e_out[-1].wasGeneratedBy(ctbin)
        for e in e_in:
            e_out[-1].wasDerivedFrom(e)
    return pdoc
Beispiel #16
0
def addition(graph: ProvDocument, package: CommitModelPackage, action: Addition) -> ProvDocument:
    """Add model for a newly added file."""
    file, file_version = action
    author, commit = package.author, package.commit
    graph.entity(*file)
    graph.entity(*file_version)
    graph.wasGeneratedBy(file.id, commit.id)
    graph.wasGeneratedBy(file_version.id, commit.id)
    graph.wasAttributedTo(file.id, author.id)
    graph.wasAttributedTo(file_version.id, author.id)
    graph.specializationOf(file_version.id, file.id)
    return graph
Beispiel #17
0
def gen_prov_graph(file_path, option):
    '''
      generates prov graph from form json file
      option = "all": add attribues to nodes
    '''
    form_file = open(file_path, "r")
    json_info = form_file.read()
    form_file.close()
    sf_dict = json.loads(json_info)

    d1 = ProvDocument()
    d1.add_namespace('subm',
                     'http://www.enes.org/enes_entity/data_submsission')

    global_in_out = d1.entity("subm:" + "form_name_xx")

    print("workflow definition: ", sf_dict['workflow'])
    for [act_name, act] in sf_dict['workflow']:

        print("adding entities for workflow_step: ", act_name)
        entity_in_dict = sf_dict[act_name]['entity_in']
        entity_out_dict = sf_dict[act_name]['entity_out']
        agent_dict = sf_dict[act_name]['agent']
        activity_dict = sf_dict[act_name]['activity']

        # generate nodes
        in_node = d1.entity("subm:" + entity_in_dict['i_name'])
        out_node = d1.entity("subm:" + entity_out_dict['i_name'])
        agent = d1.agent("subm:" + agent_dict['i_name'])
        activity = d1.activity("subm:" + activity_dict['i_name'])

        #clean up and prefix dictionaries
        entity_in_dict = prefix_dict(entity_in_dict, 'subm')
        entity_out_dict = prefix_dict(entity_out_dict, 'subm')
        agent_dict = prefix_dict(agent_dict, 'subm')
        activity_dict = prefix_dict(activity_dict, 'subm')

        if option == "all":
            in_node.add_attributes(entity_in_dict)
            out_node.add_attributes(entity_out_dict)
            agent.add_attributes(agent_dict)
            activity.add_attributes(activity_dict)

        # connect nodes in graph
        d1.wasGeneratedBy(out_node, activity)
        d1.used(activity, in_node)
        d1.wasAssociatedWith(activity, agent)
        d1.wasDerivedFrom(in_node, out_node)
        d1.used(activity, global_in_out)
        d1.wasGeneratedBy(global_in_out, activity)

    return d1
Beispiel #18
0
def long_literals():
    g = ProvDocument()

    long_uri = "http://Lorem.ipsum/dolor/sit/amet/consectetur/adipiscing/elit/Quisque/vel/sollicitudin/felis/nec/venenatis/massa/Aenean/lectus/arcu/sagittis/sit/amet/nisl/nec/varius/eleifend/sem/In/hac/habitasse/platea/dictumst/Aliquam/eget/fermentum/enim/Curabitur/auctor/elit/non/ipsum/interdum/at/orci/aliquam/"
    ex = Namespace('ex', long_uri)
    g.add_namespace(ex)

    g.entity(
        'ex:e1', {
            'prov:label':
            'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec pellentesque luctus nulla vel ullamcorper. Donec sit amet ligula sit amet lorem pretium rhoncus vel vel lorem. Sed at consequat metus, eget eleifend massa. Fusce a facilisis turpis. Lorem volutpat.'
        })

    return g
def create_document():
    # Create a new provenance document
    document = ProvDocument()  # d1 is now an empty provenance document
    # Before asserting provenance statements, we need to have a way to refer to the "things"
    # we want to describe provenance (e.g. articles, data sets, people). For that purpose,
    # PROV uses qualified names to identify things, which essentially a shortened representation
    # of a URI in the form of prefix:localpart. Valid qualified names require their prefixes defined,
    # which we is going to do next.

    # Declaring namespaces for various prefixes used in the example
    document.add_namespace('now', 'http://www.provbook.org/nownews/')
    document.add_namespace('nowpeople',
                           'http://www.provbook.org/nownews/people/')
    document.add_namespace('bk', 'http://www.provbook.org/ns/#')

    # Entity: now:employment-article-v1.html
    e1 = document.entity('now:employment-article-v1.html')
    e1.add_attributes({'prov:value': 'Conteudo do HTML'})
    document.agent('nowpeople:Filipe')

    # Attributing the article to the agent
    document.wasAttributedTo(
        e1, 'nowpeople:Filipe_' + str(random.randint(1000, 1070000000)))

    # add more namespace declarations
    document.add_namespace('govftp',
                           'ftp://ftp.bls.gov/pub/special.requests/oes/')
    document.add_namespace('void', 'http://vocab.deri.ie/void#')

    # 'now:employment-article-v1.html' was derived from at dataset at govftp
    document.entity('govftp:oesm11st.zip', {
        'prov:label': 'employment-stats-2011',
        'prov:type': 'void:Dataset'
    })
    document.wasDerivedFrom('now:employment-article-v1.html',
                            'govftp:oesm11st.zip')

    # Adding an activity
    document.add_namespace('is', 'http://www.provbook.org/nownews/is/#')
    document.activity('is:writeArticle')
    # Usage and Generation
    document.used('is:writeArticle', 'govftp:oesm11st.zip')
    document.wasGeneratedBy('now:employment-article-v1.html',
                            'is:writeArticle')

    #print("Document prepared.")
    # What we have so far (in PROV-N)
    logging.debug(document.serialize(indent=2))
    # d1.serialize('article-prov.json') # write to file
    return document
Beispiel #20
0
def add_resource_creation(graph: ProvDocument, package: ResourceModelPackage) -> ProvDocument:
    """Add model for resource creation."""
    creator, creation, resource, resource_version = package.creation
    graph.activity(*creation)
    graph.entity(*resource)
    graph.entity(*resource_version)
    graph.agent(*creator)
    graph.wasAssociatedWith(creation.id, creator.id)
    graph.wasAttributedTo(resource.id, creator.id)
    graph.wasAttributedTo(resource_version.id, creator.id)
    graph.wasGeneratedBy(resource.id, creation.id)
    graph.wasGeneratedBy(resource_version.id, creation.id)
    graph.specializationOf(resource_version.id, resource.id)
    return graph
Beispiel #21
0
def primer_example_alternate():
    g = ProvDocument(namespaces={
        'ex': 'http://example/',
        'dcterms': 'http://purl.org/dc/terms/',
        'foaf': 'http://xmlns.com/foaf/0.1/'
    })

    article = g.entity('ex:article', {'dcterms:title': "Crime rises in cities"})
    articleV1 = g.entity('ex:articleV1')
    articleV2 = g.entity('ex:articleV2')
    dataSet1 = g.entity('ex:dataSet1')
    dataSet2 = g.entity('ex:dataSet2')
    regionList = g.entity('ex:regionList')
    composition = g.entity('ex:composition')
    chart1 = g.entity('ex:chart1')
    chart2 = g.entity('ex:chart2')
    blogEntry = g.entity('ex:blogEntry')

    compile = g.activity('ex:compile')
    compile2 = g.activity('ex:compile2')
    compose = g.activity('ex:compose')
    correct = g.activity('ex:correct', '2012-03-31T09:21:00', '2012-04-01T15:21:00')
    illustrate = g.activity('ex:illustrate')

    compose.used(dataSet1, attributes={'prov:role': "ex:dataToCompose"})
    compose.used(regionList, attributes={'prov:role': "ex:regionsToAggregateBy"})
    composition.wasGeneratedBy(compose)

    illustrate.used(composition)
    chart1.wasGeneratedBy(illustrate)

    chart1.wasGeneratedBy(compile, '2012-03-02T10:30:00')

    derek = g.agent('ex:derek', {
        'prov:type': PROV['Person'], 'foaf:givenName': "Derek", 'foaf:mbox': "<mailto:[email protected]>"
    })
    compose.wasAssociatedWith(derek)
    illustrate.wasAssociatedWith(derek)

    chartgen = g.agent('ex:chartgen', {
        'prov:type': PROV["Organization"], 'foaf:name': "Chart Generators Inc"
    })
    derek.actedOnBehalfOf(chartgen, compose)
    chart1.wasAttributedTo(derek)

    dataSet2.wasGeneratedBy(correct)
    correct.used(dataSet1)
    dataSet2.wasDerivedFrom(dataSet1, attributes={'prov:type': PROV['Revision']})
    chart2.wasDerivedFrom(dataSet2)

    blogEntry.wasDerivedFrom(article, attributes={'prov:type': PROV['Quotation']})
    articleV1.specializationOf(article)
    articleV1.wasDerivedFrom(dataSet1)

    articleV2.specializationOf(article)
    articleV2.wasDerivedFrom(dataSet2)

    articleV2.alternateOf(articleV1)

    return g
Beispiel #22
0
def collections():
    g = ProvDocument()
    ex = Namespace('ex', 'http://example.org/')

    c1 = g.collection(ex['c1'])
    e1 = g.entity('ex:e1')
    g.hadMember(c1, e1)

    return g
Beispiel #23
0
def collections():
    g = ProvDocument()
    ex = Namespace('ex', 'http://example.org/')

    c1 = g.collection(ex['c1'])
    e1 = g.entity('ex:e1')
    g.hadMember(c1, e1)

    return g
def collections():
    g = ProvDocument()
    ex = Namespace("ex", "http://example.org/")

    c1 = g.collection(ex["c1"])
    e1 = g.entity("ex:e1")
    g.hadMember(c1, e1)

    return g
Beispiel #25
0
def datatypes():
    g = ProvDocument()
    ex = Namespace('ex', 'http://example.org/')
    g.add_namespace(ex)

    attributes = {'ex:int': 100,
                  'ex:float': 100.123456,
                  'ex:long': 123456789000,
                  'ex:bool': True,
                  'ex:str': 'Some string',
                  'ex:unicode': u'Some unicode string with accents: Huỳnh Trung Đông',
                  'ex:timedate': datetime.datetime(2012, 12, 12, 14, 7, 48),
                  'ex:intstr': Literal("PROV Internationalized string", PROV["InternationalizedString"], "en"),
    }
    multiline = """Line1
    Line2
Line3"""
    attributes['ex:multi-line'] = multiline
    g.entity('ex:e1', attributes)
    return g
Beispiel #26
0
def release_tag_model(graph: ProvDocument, packages: ReleaseTagPackage):
    for package in packages:
        if package.release_package is not None:
            r_user, release, release_event, release_evidence, assets = package.release_package
            graph.agent(*r_user)
            graph.entity(*release)
            graph.activity(*release_event)
            graph.entity(*release_evidence)
            for asset in assets:
                graph.entity(*asset)
                graph.hadMember(asset.id, release.id)

            graph.hadMember(release_evidence.id, release.id)
            graph.wasGeneratedBy(release.id, release_event.id)
            graph.wasAttributedTo(release.id, r_user.id)
            graph.wasAssociatedWith(release_event.id, r_user.id)

        if package.tag_package is not None:
            t_user, tag, tag_event = package.tag_package
            graph.agent(*t_user)
            graph.entity(*tag)
            graph.activity(*tag_event)

            if package.release_package is not None:
                graph.hadMember(tag.id, release.id)
            graph.wasGeneratedBy(tag.id, tag_event.id)
            graph.wasAttributedTo(tag.id, t_user.id)
            graph.wasAssociatedWith(tag_event.id, t_user.id)

        if package.commit_package is not None:
            author, commit_event, _, commit, _ = package.commit_package
            graph.agent(*author)
            graph.activity(*commit_event)
            graph.entity(*commit)

            if package.tag_package is not None:
                graph.hadMember(commit.id, tag.id)
            graph.wasGeneratedBy(commit.id, commit_event.id)
            graph.wasAttributedTo(commit.id, author.id)
            graph.wasAssociatedWith(commit_event.id, author.id)
    return graph
Beispiel #27
0
def test_cmip6_data_citation(tmp_path, monkeypatch):
    """Test2: CMIP6 citation info is retrieved from ES-DOC."""
    # Create fake provenance
    provenance = ProvDocument()
    provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file')
    provenance.add_namespace('attribute',
                             uri=ESMVALTOOL_URI_PREFIX + 'attribute')
    attributes = {
        'attribute:mip_era': 'CMIP6',
        'attribute:activity_id': 'activity',
        'attribute:institution_id': 'institution',
        'attribute:source_id': 'source',
        'attribute:experiment_id': 'experiment',
    }
    filename = str(tmp_path / 'output.nc')
    provenance.entity('file:' + filename, attributes)

    monkeypatch.setattr(esmvalcore._citation, '_get_response',
                        mock_get_response)
    _write_citation_files(filename, provenance)
    citation_file = tmp_path / 'output_citation.bibtex'

    # Create fake bibtex entry
    url = 'url not found'
    title = 'title is found'
    publisher = 'publisher not found'
    year = 'publicationYear not found'
    authors = 'creators not found'
    doi = 'doi not found'
    fake_bibtex_entry = textwrap.dedent(f"""
        @misc{{{url},
        \turl = {{{url}}},
        \ttitle = {{{title}}},
        \tpublisher = {{{publisher}}},
        \tyear = {year},
        \tauthor = {{{authors}}},
        \tdoi = {{{doi}}},
        }}
        """).lstrip()
    assert citation_file.read_text() == '\n'.join(
        [ESMVALTOOL_PAPER, fake_bibtex_entry])
Beispiel #28
0
def ctfToProv():
    d1 = ProvDocument()
    dummy = ProvDocument()
    ex = Namespace(
        'ex', 'http://example/'
    )  # namespaces do not need to be explicitly added to a document
    #data = event_field(os.path.join(trace_path,'../config.yaml'))
    counter = 0
    counter_1 = 0
    relationships = []
    entities = []
    activities = []
    for event in trace_collection.events:
        dataset = {
            'ex:' + k: event[k]
            for k in event.field_list_with_scope(
                babeltrace.CTFScope.EVENT_FIELDS)
        }
        dataset.update(
            {'ex:' + 'timestamp': (event['timestamp'] / 1000000000)})
        #dataset.update({'ex:'+'name':event.name})

        e1 = d1.entity(ex['event' + str(counter)], dataset)
        entities.append(e1)
        producer_agent = d1.agent('ex:' + event['producer_id'])
        controller_agent = d1.agent('ex:' + event['controller_id'])
        activity = d1.activity('ex:' + event['activity'] + str(counter_1))
        activities.append(activity)
        d1.wasGeneratedBy(e1, activity)
        # strings used to detect if the relationship already exists in the d1 document
        association_relationship = str(
            dummy.wasAssociatedWith(activity, producer_agent))
        used_relationship = str(dummy.used(controller_agent, producer_agent))

        # Add activity to producer agent if it has not been added before.
        d1.wasAssociatedWith(activity, producer_agent)
        # if association_relationship not in relationships:
        #     d1.wasAssociatedWith(activity, producer_agent)
        #     relationships.append(association_relationship)

        # Add producer agent to controller agent if it has not been added yet.
        if used_relationship not in relationships:
            d1.used(controller_agent, producer_agent)
            relationships.append(used_relationship)

        # Add temporal relationship between this event and the previous one.
        if counter > 0:
            d1.wasAssociatedWith(entities[counter - 1], e1)

        counter += 1
        counter_1 += 1
    return d1
Beispiel #29
0
def ctfToProv():
    d1 = ProvDocument()
    dummy = ProvDocument()
    ex = Namespace('ex', 'http://example/')  # namespaces do not need to be explicitly added to a document
    #data = event_field(os.path.join(trace_path,'../config.yaml'))
    counter = 0
    counter_1 = 0
    relationships = []
    entities = []
    activities = []
    producer_events = {}
    for event in trace_collection.events:
        dataset = {'ex:'+k:event[k] for k in event.field_list_with_scope(
            babeltrace.CTFScope.EVENT_FIELDS)}
        dataset.update({'ex:'+'timestamp':(event['timestamp']/1000000000)})
        #dataset.update({'ex:'+'name':event.name})

        e1 = d1.entity(ex['event'+str(counter)],dataset)
        entities.append(e1)
        producer_agent = d1.agent('ex:'+event['producer_id'])
        if event['producer_id'] not in producer_events:
                producer_events[event['producer_id']] = []
        else:
                pel = producer_events[events['producer_id']]
                d1.wasAssociatedWith(pel[len(pel)-1], e1)
                pel.append(e1)
        controller_agent = d1.agent('ex:'+event['controller_id'])
        activity = d1.activity('ex:'+event['activity']+str(counter_1))
        activities.append(activity)
        d1.wasGeneratedBy(e1, activity)
        # strings used to detect if the relationship already exists in the d1 document
        association_relationship = str(dummy.wasAssociatedWith(activity, producer_agent))
        used_relationship = str(dummy.used(controller_agent, producer_agent))

        # Add activity to producer agent if it has not been added before.
        d1.wasAssociatedWith(activity, producer_agent)
        # if association_relationship not in relationships:
        #     d1.wasAssociatedWith(activity, producer_agent)
        #     relationships.append(association_relationship)

        # Add producer agent to controller agent if it has not been added yet.
        if used_relationship not in relationships:
            d1.used(controller_agent, producer_agent)
            relationships.append(used_relationship)

        # Add temporal relationship between this event and the previous one.
#        if counter > 0:
#            d1.wasAssociatedWith(entities[counter - 1], e1)

        counter+=1
        counter_1 +=1
    return d1
    def test_document_update_simple(self):
        d1 = ProvDocument()
        d1.set_default_namespace(EX_URI)
        d1.entity('e')

        b1 = d1.bundle('b1')
        b1.entity('e')

        d2 = ProvDocument()
        d2.set_default_namespace(EX_URI)
        d2.entity('e')

        b1 = d2.bundle('b1')
        b1.entity('e')
        b2 = d2.bundle('b2')
        b2.entity('e')

        self.assertRaises(ProvException, lambda: d1.update(1))

        d1.update(d2)
        self.assertEqual(len(d1.get_records()), 2)
        self.assertEqual(len(d1.bundles), 2)
Beispiel #31
0
def transform_to_prov(context_model):
    from prov.model import ProvDocument
    from prov.dot import prov_to_dot

    doc = ProvDocument()
    doc.add_namespace('is', 'http://www.provbook.org/nownews/is/#')
    doc.add_namespace('void', 'http://vocab.deri.ie/void#')
    doc.add_namespace('nowpeople', 'http://www.provbook.org/nownews/people/')

    input_data = doc.entity("void:Inputdata")
    backend_agent = doc.agent("nowpeople:EODC")
    user_agent = doc.agent("nowpeople:OpenEO-User")
    doc.wasAttributedTo(input_data, backend_agent)

    process_details = context_model["process_details"]
    prev_key = input_data
    for key in process_details:

        key_entity = doc.entity("void:" + key + "_output")

        key_activity = doc.activity('is:' + key)

        doc.used(key_activity, prev_key)

        doc.wasDerivedFrom(key_entity, prev_key)
        doc.wasGeneratedBy(key_entity,
                           key_activity,
                           time=process_details[key]["timing"]["end"])

        doc.wasStartedBy(key_activity,
                         user_agent,
                         time=process_details[key]["timing"]["start"])

        prev_key = key_entity

    dot = prov_to_dot(doc)
    dot.write_png('output-prov.png')

    return doc
Beispiel #32
0
    def test_document_update_simple(self):
        d1 = ProvDocument()
        d1.set_default_namespace(EX_URI)
        d1.entity('e')

        b1 = d1.bundle('b1')
        b1.entity('e')

        d2 = ProvDocument()
        d2.set_default_namespace(EX_URI)
        d2.entity('e')

        b1 = d2.bundle('b1')
        b1.entity('e')
        b2 = d2.bundle('b2')
        b2.entity('e')

        self.assertRaises(ProvException, lambda: d1.update(1))

        d1.update(d2)
        self.assertEqual(len(d1.get_records()), 2)
        self.assertEqual(len(d1.bundles), 2)
Beispiel #33
0
    def test_xsd_qnames(self):
        prov_doc = ProvDocument()
        ex = Namespace('ex', 'http://www.example.org/')
        prov_doc.add_namespace(ex)
        ex1 = Namespace('ex1', 'http://www.example1.org/')  # ex1 is not added to the document

        an_xsd_qname = XSDQName(ex['a_value'])
        another_xsd_qname = XSDQName(ex1['another_value'])

        e1 = prov_doc.entity('ex:e1', {'prov:value': an_xsd_qname, 'prov:type': another_xsd_qname})
        for _, attr_value in e1.attributes:
            self.assertIsInstance(attr_value, XSDQName)

        self.assertRoundTripEquivalence(prov_doc)
Beispiel #34
0
    def generateProvlet(self, aDO, aRO):
        # create provlet
        d1 = ProvDocument()  # d1 is now an empty provenance document
        d1.add_namespace("dt", "http://cs.ncl.ac.uk/dtsim/")

        e1 = d1.entity(DTns + aRO.id)
        ag1 = d1.agent(DTns + str(aDO.id))
        d1.wasAttributedTo(e1, ag1)

        # update global graph
        e1 = pGlobal.entity(DTns + aRO.id)
        ag1 = pGlobal.agent(DTns + str(aDO.id))
        pGlobal.wasAttributedTo(e1, ag1)

        # 		self.notify(d1)
        return d1
Beispiel #35
0
def get_recipe_provenance(documentation, filename):
    """Create a provenance entity describing a recipe."""
    provenance = ProvDocument()

    for namespace in ('recipe', 'attribute'):
        create_namespace(provenance, namespace)

    entity = provenance.entity(
        'recipe:{}'.format(filename), {
            'attribute:description': documentation.get('description', ''),
            'attribute:references': str(documentation.get('references', [])),
        })

    attribute_to_authors(entity, documentation.get('authors', []))
    attribute_to_projects(entity, documentation.get('projects', []))

    return entity
Beispiel #36
0
def modification(graph: ProvDocument, package: CommitModelPackage, action: Modification) -> ProvDocument:
    """Add model for a modified file."""
    file, file_version, previous_versions = action
    author, commit = package.author, package.commit
    graph.entity(*file)
    graph.entity(*file_version)
    graph.wasAttributedTo(file_version.id, author.id)
    graph.wasGeneratedBy(file_version.id, commit.id)
    graph.specializationOf(file_version.id, file.id)
    for version in previous_versions:
        graph.entity(*version)
        graph.used(commit.id, version.id)
        graph.wasRevisionOf(file_version.id, version.id)
        graph.specializationOf(version.id, file.id)
    return graph
Beispiel #37
0
def add_event_chain(graph: ProvDocument, package: ResourceModelPackage) -> ProvDocument:
    """Add chain of events beginning at the creation event."""
    previous_event = previous_resource_version = None
    for chain_link in package.event_chain:
        user, event, resource, resource_version = chain_link
        graph.entity(*resource)
        graph.entity(*resource_version)
        graph.activity(*event)
        graph.agent(*user)
        graph.wasAssociatedWith(event.id, user.id)
        graph.wasAttributedTo(resource_version.id, user.id)
        graph.specializationOf(resource_version.id, resource.id)
        if previous_event is not None and previous_resource_version is not None:
            graph.entity(*previous_resource_version)
            graph.activity(*previous_event)
            graph.wasGeneratedBy(resource_version.id, event.id)
            graph.used(event.id, previous_resource_version.id)
            graph.wasDerivedFrom(resource_version.id, previous_resource_version.id)
            graph.wasInformedBy(event.id, previous_event.id)
        previous_event = event
        previous_resource_version = resource_version
    return graph
Beispiel #38
0
class TrackedFile(object):
    """File with provenance tracking."""
    def __init__(self, filename, attributes, ancestors=None):
        """Create an instance of a file with provenance tracking."""
        self._filename = filename
        self.attributes = copy.deepcopy(attributes)

        self.provenance = None
        self.entity = None
        self.activity = None
        self._ancestors = [] if ancestors is None else ancestors

    def __str__(self):
        """Return summary string."""
        return "{}: {}".format(self.__class__.__name__, self.filename)

    def copy_provenance(self, target=None):
        """Create a copy with identical provenance information."""
        if self.provenance is None:
            raise ValueError("Provenance of {} not initialized".format(self))
        if target is None:
            new = TrackedFile(self.filename, self.attributes)
        else:
            if target.filename != self.filename:
                raise ValueError(
                    "Attempt to copy provenance to incompatible file.")
            new = target
            new.attributes = copy.deepcopy(self.attributes)
        new.provenance = copy.deepcopy(self.provenance)
        new.entity = new.provenance.get_record(self.entity.identifier)[0]
        new.activity = new.provenance.get_record(self.activity.identifier)[0]
        return new

    @property
    def filename(self):
        """Filename."""
        return self._filename

    def initialize_provenance(self, activity):
        """Initialize the provenance document.

        Note: this also copies the ancestor provenance. Therefore, changes
        made to ancestor provenance after calling this function will not
        propagate into the provenance of this file.
        """
        if self.provenance is not None:
            raise ValueError(
                "Provenance of {} already initialized".format(self))
        self.provenance = ProvDocument()
        self._initialize_namespaces()
        self._initialize_activity(activity)
        self._initialize_entity()
        self._initialize_ancestors(activity)

    def _initialize_namespaces(self):
        """Inialize the namespaces."""
        for namespace in ('file', 'attribute', 'preprocessor', 'task'):
            create_namespace(self.provenance, namespace)

    def _initialize_activity(self, activity):
        """Copy the preprocessor task activity."""
        self.activity = activity
        update_without_duplicating(self.provenance, activity.bundle)

    def _initialize_entity(self):
        """Initialize the entity representing the file."""
        attributes = {
            'attribute:' + k: str(v)
            for k, v in self.attributes.items()
            if k not in ('authors', 'projects')
        }
        self.entity = self.provenance.entity('file:' + self.filename,
                                             attributes)
        attribute_to_authors(self.entity, self.attributes.get('authors', []))
        attribute_to_projects(self.entity, self.attributes.get('projects', []))

    def _initialize_ancestors(self, activity):
        """Register ancestor files for provenance tracking."""
        for ancestor in self._ancestors:
            if ancestor.provenance is None:
                ancestor.initialize_provenance(activity)
            update_without_duplicating(self.provenance, ancestor.provenance)
            self.wasderivedfrom(ancestor)

    def wasderivedfrom(self, other):
        """Let the file know that it was derived from other."""
        if isinstance(other, TrackedFile):
            other_entity = other.entity
        else:
            other_entity = other
        update_without_duplicating(self.provenance, other_entity.bundle)
        if not self.activity:
            raise ValueError("Activity not initialized.")
        self.entity.wasDerivedFrom(other_entity, self.activity)

    def _select_for_include(self):
        attributes = {
            'provenance': self.provenance.serialize(format='xml'),
            'software': "Created with ESMValTool v{}".format(__version__),
        }
        if 'caption' in self.attributes:
            attributes['caption'] = self.attributes['caption']
        return attributes

    @staticmethod
    def _include_provenance_nc(filename, attributes):
        with Dataset(filename, 'a') as dataset:
            for key, value in attributes.items():
                setattr(dataset, key, value)

    @staticmethod
    def _include_provenance_png(filename, attributes):
        pnginfo = PngInfo()
        exif_tags = {
            'provenance': 'ImageHistory',
            'caption': 'ImageDescription',
            'software': 'Software',
        }
        for key, value in attributes.items():
            pnginfo.add_text(exif_tags.get(key, key), value, zip=True)
        with Image.open(filename) as image:
            image.save(filename, pnginfo=pnginfo)

    def _include_provenance(self):
        """Include provenance information as metadata."""
        attributes = self._select_for_include()

        # List of files to attach provenance to
        files = [self.filename]
        if 'plot_file' in self.attributes:
            files.append(self.attributes['plot_file'])

        # Attach provenance to supported file types
        for filename in files:
            ext = os.path.splitext(filename)[1].lstrip('.').lower()
            write = getattr(self, '_include_provenance_' + ext, None)
            if write:
                write(filename, attributes)

    def save_provenance(self):
        """Export provenance information."""
        self._include_provenance()
        filename = os.path.splitext(self.filename)[0] + '_provenance'
        self.provenance.serialize(filename + '.xml', format='xml')
        # Only plot provenance if there are not too many records.
        if len(self.provenance.records) > 100:
            logger.debug("Not plotting large provenance tree of %s",
                         self.filename)
        else:
            figure = prov_to_dot(self.provenance)
            figure.write_svg(filename + '.svg')
def example():

    g = ProvDocument()
    # Local namespace
    # Doesnt exist yet so we are creating it
    ap = Namespace('aip', 'https://araport.org/provenance/')
    # Dublin Core
    g.add_namespace("dcterms", "http://purl.org/dc/terms/")
    # FOAF
    g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")

    # Add sponsors and contributors as Agents
    # ap['matthew_vaughn']
    # aip:matthew_vaughn
    # https://araport.org/provenance/:matthew_vaughn
    # Learn this from a call to profiles service? Adds a dependency on Agave so I am open to figuring out another way
    me = g.agent(ap['matthew_vaughn'], {
        'prov:type': PROV["Person"], 'foaf:givenName': "Matthew Vaughn", 'foaf:mbox': "<mailto:[email protected]>"
    })
    # Hard coded for now
    walter = g.agent(ap['walter_moreira'], {
        'prov:type': PROV["Person"], 'foaf:givenName': "Walter Moreira", 'foaf:mbox': "<mailto:[email protected]>"
    })
    utexas = g.agent(ap['university_of_texas'], {
        'prov:type': PROV["Organization"], 'foaf:givenName': "University of Texas at Austin"
    })

    # Set delegation to our host University
    # We may have trouble doing this for other users since we don't always capture their host instituion
    g.actedOnBehalfOf(walter, utexas)
    g.actedOnBehalfOf(me, utexas)

    # Include the ADAMA platform as an Agent and set attribution
    # dcterms:title and dcterms:description are hardcoded
    # dcterms:language is hard-coded
    # dcterms:source is the URI of the public git source repository for ADAMA
    # "dcterms:updated": "2015-04-17T09:44:56" - this would actually be the date ADAMA was updated
    adama_platform = g.agent(ap['adama_platform'], {'dcterms:title': "ADAMA", 'dcterms:description': "Araport Data and Microservices API", 'dcterms:language':"en-US", 'dcterms:identifier':"https://api.araport.org/community/v0.3/", 'dcterms:updated': "2015-04-17T09:44:56" })
    g.wasGeneratedBy(adama_platform, walter)

    # Include the ADAMA microservice as an Agent and set attribution+delegation
    # dcterms:title and dcterms:description are inherited from the service's metadata
    # dcterms:language is hard-coded
    # dcterms:identifier is the deployment URI for the service
    # dcterms:source is the URI of the public git source repository. The URL in this example is just a dummy
    #
    # The name for each microservice should be unique. We've decided to
    # use the combination of namespace, service name, and version
    microservice_name = 'mwvaughn/bar_annotation_v1.0.0'
    adama_microservice = g.agent(ap[microservice_name], {'dcterms:title': "BAR Annotation Service", 'dcterms:description': "Returns annotation from locus ID", 'dcterms:language':"en-US", 'dcterms:identifier':"https://api.araport.org/community/v0.3/mwvaughn/bar_annotation_v1.0.0", 'dcterms:source':"https://github.com/Arabidopsis-Information-Portal/prov-enabled-api-sample" })

    # the microservice was generated by me on date X (don't use now, use when the service was updated)
    g.wasGeneratedBy(adama_microservice, me, datetime.datetime.now())
    # The microservice used the platform now
    g.used(adama_microservice, adama_platform, datetime.datetime.now())

    # Sources
    #
    # Define BAR
    # Agents
    nick = g.agent(ap['nicholas_provart'], {
        'prov:type': PROV["Person"], 'foaf:givenName': "Nicholas Provart", 'foaf:mbox': "*****@*****.**"
    })
    utoronto = g.agent(ap['university_of_toronto'], {
        'prov:type': PROV["Organization"], 'foaf:givenName': "University of Toronto", 'dcterms:identifier':"http://www.utoronto.ca/"
    })
    g.actedOnBehalfOf(nick, utoronto)

    # Entity
    # All fields derived from Sources.yml
    # dcterms:title and dcterms:description come straight from the YAML
    # dcterms:identifier - URI pointing to the source's canonical URI representation
    # optional - dcterms:language: Recommended best practice is to use a controlled vocabulary such as RFC 4646
    # optional - dcterms:updated: date the source was published or last updated
    # optional - dcterms:license: Simple string or URI to license. Validate URI if provided?
    datasource1 = g.entity(ap['datasource1'], {'dcterms:title': "BAR Arabidopsis AGI -> Annotation", 'dcterms:description': "Most recent annotation for given AGI", 'dcterms:language':"en-US", 'dcterms:identifier':"http://bar.utoronto.ca/webservices/agiToAnnot.php", 'dcterms:updated':"2015-04-17T09:44:56", 'dcterms:license':"Creative Commons 3.0" })
    # Set up attribution to Nick
    g.wasAttributedTo(datasource1, nick)

    # Define TAIR
    # Agents
    # dcterms:language: Recommended best practice is to use a controlled vocabulary such as RFC 4646
    eva = g.agent(ap['eva_huala'], {
        'prov:type': PROV["Person"], 'foaf:givenName': "Eva Huala"
    })
    phoenix = g.agent(ap['phoenix_bioinformatics'], {
        'prov:type': PROV["Organization"], 'foaf:givenName': "Phoenix Bioinformatics"
    })
    g.actedOnBehalfOf(eva, phoenix)

    # Entity
    # All fields derived from Sources.yml
    # optional - dcterms:citation: Plain text bibliographic citation. If only provided as doi, should we try to validate it?
    datasource2 = g.entity(ap['datasource2'], {'dcterms:title': "TAIR", 'dcterms:description': "The Arabidopsis Information Resource", 'dcterms:language':"en-US", 'dcterms:identifier':"https://www.arabidopsis.org/", 'dcterms:citation':"The Arabidopsis Information Resource (TAIR): improved gene annotation and new tools. Nucleic Acids Research 2011 doi: 10.1093/nar/gkr1090"})
    g.wasAttributedTo(datasource2, eva)

    # In Sources.yml, these two sources are nested. Define that relationship here
    # There are other types of relationships but we will just use derived from for simplicity in this prototype
    g.wasDerivedFrom(ap['datasource1'], ap['datasource2'])

    # Depending on which ADAMA microservice type we are using, define an activity
    # Eventually, break these into more atomic actions in a chain
    action1 = g.activity(ap['do_query'], datetime.datetime.now())
    # action1 = g.activity(ap['do_map'], datetime.datetime.now())
    # action1 = g.activity(ap['do_generic'], datetime.datetime.now())
    # action1 = g.activity(ap['do_passthrough'], datetime.datetime.now())
    # Future... Support for ADAMA-native microservices
    # action1 = g.activity(ap['generate'], datetime.datetime.now())

    # Define current ADAMA response as an Entity
    # This is what's being returned to the user and is thus the subject of the PROV record
    # May be able to add more attributes to it but this is the minimum
    response = g.entity(ap['adama_response'])

    # Response is generated by the process_query action
    # Time-stamp it!
    g.wasGeneratedBy(response, ap['do_query'], datetime.datetime.now())
    # The process_query used the microservice
    g.used(ap['do_query'], adama_microservice, datetime.datetime.now())
    # The microservice used datasource1
    g.used(adama_microservice, datasource1, datetime.datetime.now())

    # Print prov_n
    print(g.get_provn())
    # Print prov-json
    print(g.serialize())
    # Write out as a pretty picture
    graph = prov.dot.prov_to_dot(g)
    graph.write_png('Sources.png')
Beispiel #40
0
def toW3Cprov(ling,bundl,format='w3c-prov-xml'):
        
        g = ProvDocument()
        vc = Namespace("knmi", "http://knmi.nl")  # namespaces do not need to be explicitly added to a document
        con = Namespace("dfp", "http://dispel4py.org")
        g.add_namespace("dcterms", "http://purl.org/dc/terms/")
        
        'specify bundle'
        bundle=None
        for trace in bundl:
            'specifing user'
            ag=g.agent(vc[trace["username"]],other_attributes={"dcterms:author":trace["username"]})  # first time the ex namespace was used, it is added to the document automatically
            
            if trace['type']=='workflow_run':
                
                trace.update({'runId':trace['_id']})
                bundle=g.bundle(vc[trace["runId"]])
                bundle.actedOnBehalfOf(vc[trace["runId"]], vc[trace["username"]])
                
                dic={}
                i=0
                
                for key in trace:
                    
                
                    if key != "input": 
                        if ':' in key:
                            dic.update({key: trace[key]})
                        else:
                            dic.update({vc[key]: trace[key]})
                    
            
                dic.update({'prov:type': PROV['Bundle']})
                g.entity(vc[trace["runId"]], dic)
                
                dic={}
                i=0
                if type(trace['input'])!=list:
                    trace['input']=[trace['input']]
                for y in trace['input']:
                    for key in y:
                        if ':' in key:
                            dic.update({key: y[key]})
                        else:
                            dic.update({vc[key]: y[key]})
                    dic.update({'prov:type': 'worklfow_input'})
                    bundle.entity(vc[trace["_id"]+"_"+str(i)], dic)
                    bundle.used(vc[trace["_id"]], vc[trace["_id"]+"_"+str(i)], identifier=vc["used_"+trace["_id"]+"_"+str(i)])
                    i=i+1
                    
                    
        'specify lineage'
        for trace in ling:

            #pprint(trace)

            try:
                bundle=g.bundle(vc[trace["runId"]])
                bundle.wasAttributedTo(vc[trace["runId"]], vc["ag_"+trace["username"]],identifier=vc["attr_"+trace["runId"]])
            
            except:
                pass
            'specifing creator of the activity (to be collected from the registy)'
        
            if 'creator' in trace:
                bundle.agent(vc["ag_"+trace["creator"]],other_attributes={"dcterms:creator":trace["creator"]})  # first time the ex namespace was used, it is added to the document automatically
                bundle.wasAssociatedWith('process_'+trace["iterationId"],vc["ag_"+trace["creator"]])
                bundle.wasAttributedTo(vc[trace["runId"]], vc["ag_"+trace["creator"]])
    
            'adding activity information for lineage'
            dic={}
            for key in trace:
                
                if type(trace[key])!=list:
                    if ':' in key:
                        dic.update({key: trace[key]})
                    else:
                        
                        if key=='location':
                            
                            dic.update({"prov:location": trace[key]})    
                        else:
                            dic.update({vc[key]: trace[key]})
            bundle.activity(vc["process_"+trace["iterationId"]], trace["startTime"], trace["endTime"], dic.update({'prov:type': trace["name"]}))
        
            'adding parameters to the document as input entities'
            dic={}
            for x in trace["parameters"]:
                #print x
                if ':' in x["key"]:
                    dic.update({x["key"]: x["val"]})
                else:
                    dic.update({vc[x["key"]]: x["val"]})
                
            dic.update({'prov:type':'parameters'})        
            bundle.entity(vc["parameters_"+trace["instanceId"]], dic)
            bundle.used(vc['process_'+trace["iterationId"]], vc["parameters_"+trace["instanceId"]], identifier=vc["used_"+trace["iterationId"]])

            'adding input dependencies to the document as input entities'
            dic={}
        
            for x in trace["derivationIds"]:
                'state could be added'   
            #dic.update({'prov:type':'parameters'})        
                bundle.used(vc['process_'+trace["iterationId"]], vc[x["DerivedFromDatasetID"]], identifier=vc["used_"+x["DerivedFromDatasetID"]])


            'adding entities to the document as output metadata'
            for x in trace["streams"]:
                i=0
                parent_dic={}
                for key in x:
                        if key=='con:immediateAccess':
                            
                            parent_dic.update({vc['immediateAccess']: x[key]}) 
                            
                    
                        elif key=='location':
                             
                            parent_dic.update({"prov:location": str(x[key])})    
                        else:
                            parent_dic.update({vc[key]: str(x[key])})    
            
            
                c1=bundle.collection(vc[x["id"]],other_attributes=parent_dic)
                bundle.wasGeneratedBy(vc[x["id"]], vc["process_"+trace["iterationId"]], identifier=vc["wgb_"+x["id"]])
            
                for d in trace['derivationIds']:
                      bundle.wasDerivedFrom(vc[x["id"]], vc[d['DerivedFromDatasetID']],identifier=vc["wdf_"+x["id"]])
        
                for y in x["content"]:
                
                    dic={}
                
                    if isinstance(y, dict):
                        val=None
                        for key in y:
                        
                            try: 
                                val =num(y[key])
                                
                            except Exception,e:
                                val =str(y[key])
                            
                            if ':' in key:
                                dic.update({key: val})
                            else:
                                dic.update({vc[key]: val})
                    else:
                        dic={vc['text']:y}
                
                 
                    dic.update({"verce:parent_entity": vc["data_"+x["id"]]})
                    
                    print  x["id"]
                    print  str(i)
                    print  dic

                    e1=bundle.entity(vc["data_"+x["id"]+"_"+str(i)], dic)
                
                    bundle.hadMember(c1, e1)
                    bundle.wasGeneratedBy(vc["data_"+x["id"]+"_"+str(i)], vc["process_"+trace["iterationId"]], identifier=vc["wgb_"+x["id"]+"_"+str(i)])
                
                    for d in trace['derivationIds']:
                        bundle.wasDerivedFrom(vc["data_"+x["id"]+"_"+str(i)], vc[d['DerivedFromDatasetID']],identifier=vc["wdf_"+"data_"+x["id"]+"_"+str(i)])
        
                    i=i+1
Beispiel #41
0
 def document_1(self):
     d1 = ProvDocument()
     ns_ex = d1.add_namespace('ex', EX_URI)
     d1.entity(ns_ex['e1'])
     return d1
Beispiel #42
0
class NIDMExporter():

    """
    Generic class to parse a result directory to extract the pieces of
    information to be stored in NIDM-Results and to generate a NIDM-Results
    export.
    """

    def __init__(self, version, out_dir, zipped=True):
        out_dirname = os.path.basename(out_dir)
        out_path = os.path.dirname(out_dir)

        # Create output path from output name
        self.zipped = zipped
        if not self.zipped:
            out_dirname = out_dirname+".nidm"
        else:
            out_dirname = out_dirname+".nidm.zip"
        out_dir = os.path.join(out_path, out_dirname)

        # Quit if output path already exists and user doesn't want to overwrite
        # it
        if os.path.exists(out_dir):
            msg = out_dir+" already exists, overwrite?"
            if not input("%s (y/N) " % msg).lower() == 'y':
                quit("Bye.")
            if os.path.isdir(out_dir):
                shutil.rmtree(out_dir)
            else:
                os.remove(out_dir)
        self.out_dir = out_dir

        if version == "dev":
            self.version = {'major': 10000, 'minor': 0, 'revision': 0,
                            'num': version}
        else:
            major, minor, revision = version.split(".")
            if "-rc" in revision:
                revision, rc = revision.split("-rc")
            else:
                rc = -1
            self.version = {'major': int(major), 'minor': int(minor),
                            'revision': int(revision), 'rc': int(rc),
                            'num': version}

        # Initialise prov document
        self.doc = ProvDocument()
        self._add_namespaces()

        # A temp directory that will contain the exported data
        self.export_dir = tempfile.mkdtemp(prefix="nidm-", dir=out_path)

        self.prepend_path = ''

    def parse(self):
        """
        Parse a result directory to extract the pieces information to be
        stored in NIDM-Results.
        """

        try:
            # Methods: find_software, find_model_fitting, find_contrasts and
            # find_inferences should be defined in the children classes and
            # return a list of NIDM Objects as specified in the objects module

            # Object of type Software describing the neuroimaging software
            # package used for the analysis
            self.software = self._find_software()

            # List of objects of type ModelFitting describing the
            # model fitting step in NIDM-Results (main activity: Model
            # Parameters Estimation)
            self.model_fittings = self._find_model_fitting()

            # Dictionary of (key, value) pairs where where key is a tuple
            # containing the identifier of a ModelParametersEstimation object
            # and a tuple of identifiers of ParameterEstimateMap objects and
            # value is an object of type Contrast describing the contrast
            # estimation step in NIDM-Results (main activity: Contrast
            # Estimation)
            self.contrasts = self._find_contrasts()

            # Inference activity and entities
            # Dictionary of (key, value) pairs where key is the identifier of a
            # ContrastEstimation object and value is an object of type
            # Inference describing the inference step in NIDM-Results (main
            # activity: Inference)
            self.inferences = self._find_inferences()
        except Exception:
            self.cleanup()
            raise

    def cleanup(self):
        if os.path.isdir(self.export_dir):
            shutil.rmtree(self.export_dir)

    def add_object(self, nidm_object, export_file=True):
        """
        Add a NIDMObject to a NIDM-Results export.
        """
        if not export_file:
            export_dir = None
        else:
            export_dir = self.export_dir

        if not isinstance(nidm_object, NIDMFile):
            nidm_object.export(self.version, export_dir)
        else:
            nidm_object.export(self.version, export_dir, self.prepend_path)
        # ProvDocument: add object to the bundle
        if nidm_object.prov_type == PROV['Activity']:
            self.bundle.activity(nidm_object.id,
                                 other_attributes=nidm_object.attributes)
        elif nidm_object.prov_type == PROV['Entity']:
            self.bundle.entity(nidm_object.id,
                               other_attributes=nidm_object.attributes)
        elif nidm_object.prov_type == PROV['Agent']:
            self.bundle.agent(nidm_object.id,
                              other_attributes=nidm_object.attributes)
        # self.bundle.update(nidm_object.p)

    def export(self):
        """
        Generate a NIDM-Results export.
        """
        try:
            if not os.path.isdir(self.export_dir):
                os.mkdir(self.export_dir)

            # Initialise main bundle
            self._create_bundle(self.version)

            self.add_object(self.software)

            # Add model fitting steps
            if not isinstance(self.model_fittings, list):
                self.model_fittings = list(self.model_fittings.values())

            for model_fitting in self.model_fittings:
                # Design Matrix
                # model_fitting.activity.used(model_fitting.design_matrix)
                self.bundle.used(model_fitting.activity.id,
                                 model_fitting.design_matrix.id)
                self.add_object(model_fitting.design_matrix)
                # *** Export visualisation of the design matrix
                self.add_object(model_fitting.design_matrix.image)

                if model_fitting.design_matrix.image.file is not None:
                    self.add_object(model_fitting.design_matrix.image.file)

                if model_fitting.design_matrix.hrf_models is not None:
                    # drift model
                    self.add_object(model_fitting.design_matrix.drift_model)

                if self.version['major'] > 1 or \
                        (self.version['major'] == 1 and
                         self.version['minor'] >= 3):
                    # Machine
                    # model_fitting.data.wasAttributedTo(model_fitting.machine)
                    self.bundle.wasAttributedTo(model_fitting.data.id,
                                                model_fitting.machine.id)
                    self.add_object(model_fitting.machine)

                    # Imaged subject or group(s)
                    for sub in model_fitting.subjects:
                        self.add_object(sub)
                        # model_fitting.data.wasAttributedTo(sub)
                        self.bundle.wasAttributedTo(model_fitting.data.id,
                                                    sub.id)

                # Data
                # model_fitting.activity.used(model_fitting.data)
                self.bundle.used(model_fitting.activity.id,
                                 model_fitting.data.id)
                self.add_object(model_fitting.data)

                # Error Model
                # model_fitting.activity.used(model_fitting.error_model)
                self.bundle.used(model_fitting.activity.id,
                                 model_fitting.error_model.id)
                self.add_object(model_fitting.error_model)

                # Parameter Estimate Maps
                for param_estimate in model_fitting.param_estimates:
                    # param_estimate.wasGeneratedBy(model_fitting.activity)
                    self.bundle.wasGeneratedBy(param_estimate.id,
                                               model_fitting.activity.id)
                    self.add_object(param_estimate)
                    self.add_object(param_estimate.coord_space)
                    self.add_object(param_estimate.file)

                    if param_estimate.derfrom is not None:
                        self.bundle.wasDerivedFrom(param_estimate.id,
                                                   param_estimate.derfrom.id)
                        self.add_object(param_estimate.derfrom)
                        self.add_object(param_estimate.derfrom.file,
                                        export_file=False)

                # Residual Mean Squares Map
                # model_fitting.rms_map.wasGeneratedBy(model_fitting.activity)
                self.add_object(model_fitting.rms_map)
                self.bundle.wasGeneratedBy(model_fitting.rms_map.id,
                                           model_fitting.activity.id)
                self.add_object(model_fitting.rms_map.coord_space)
                self.add_object(model_fitting.rms_map.file)
                if model_fitting.rms_map.derfrom is not None:
                    self.bundle.wasDerivedFrom(
                        model_fitting.rms_map.id,
                        model_fitting.rms_map.derfrom.id)
                    self.add_object(model_fitting.rms_map.derfrom)
                    self.add_object(model_fitting.rms_map.derfrom.file,
                                    export_file=False)

                # Resels per Voxel Map
                if model_fitting.rpv_map is not None:
                    self.add_object(model_fitting.rpv_map)
                    self.bundle.wasGeneratedBy(model_fitting.rpv_map.id,
                                               model_fitting.activity.id)
                    self.add_object(model_fitting.rpv_map.coord_space)
                    self.add_object(model_fitting.rpv_map.file)
                    if model_fitting.rpv_map.inf_id is not None:
                        self.bundle.used(model_fitting.rpv_map.inf_id,
                                         model_fitting.rpv_map.id)
                    if model_fitting.rpv_map.derfrom is not None:
                        self.bundle.wasDerivedFrom(
                            model_fitting.rpv_map.id,
                            model_fitting.rpv_map.derfrom.id)
                        self.add_object(model_fitting.rpv_map.derfrom)
                        self.add_object(model_fitting.rpv_map.derfrom.file,
                                        export_file=False)

                # Mask
                # model_fitting.mask_map.wasGeneratedBy(model_fitting.activity)
                self.bundle.wasGeneratedBy(model_fitting.mask_map.id,
                                           model_fitting.activity.id)
                self.add_object(model_fitting.mask_map)
                if model_fitting.mask_map.derfrom is not None:
                    self.bundle.wasDerivedFrom(
                        model_fitting.mask_map.id,
                        model_fitting.mask_map.derfrom.id)
                    self.add_object(model_fitting.mask_map.derfrom)
                    self.add_object(model_fitting.mask_map.derfrom.file,
                                    export_file=False)

                # Create coordinate space export
                self.add_object(model_fitting.mask_map.coord_space)
                # Create "Mask map" entity
                self.add_object(model_fitting.mask_map.file)

                # Grand Mean map
                # model_fitting.grand_mean_map.wasGeneratedBy(model_fitting.activity)
                self.bundle.wasGeneratedBy(model_fitting.grand_mean_map.id,
                                           model_fitting.activity.id)
                self.add_object(model_fitting.grand_mean_map)
                # Coordinate space entity
                self.add_object(model_fitting.grand_mean_map.coord_space)
                # Grand Mean Map entity
                self.add_object(model_fitting.grand_mean_map.file)

                # Model Parameters Estimation activity
                self.add_object(model_fitting.activity)
                self.bundle.wasAssociatedWith(model_fitting.activity.id,
                                              self.software.id)
                # model_fitting.activity.wasAssociatedWith(self.software)
                # self.add_object(model_fitting)

            # Add contrast estimation steps
            analysis_masks = dict()
            for (model_fitting_id, pe_ids), contrasts in list(
                    self.contrasts.items()):
                for contrast in contrasts:
                    model_fitting = self._get_model_fitting(model_fitting_id)
                    # for contrast in contrasts:
                    # contrast.estimation.used(model_fitting.rms_map)
                    self.bundle.used(contrast.estimation.id,
                                     model_fitting.rms_map.id)
                    # contrast.estimation.used(model_fitting.mask_map)
                    self.bundle.used(contrast.estimation.id,
                                     model_fitting.mask_map.id)
                    analysis_masks[contrast.estimation.id] = \
                        model_fitting.mask_map.id
                    self.bundle.used(contrast.estimation.id,
                                     contrast.weights.id)
                    self.bundle.used(contrast.estimation.id,
                                     model_fitting.design_matrix.id)
                    # contrast.estimation.wasAssociatedWith(self.software)
                    self.bundle.wasAssociatedWith(contrast.estimation.id,
                                                  self.software.id)

                    for pe_id in pe_ids:
                        # contrast.estimation.used(pe_id)
                        self.bundle.used(contrast.estimation.id, pe_id)

                    # Create estimation activity
                    self.add_object(contrast.estimation)

                    # Create contrast weights
                    self.add_object(contrast.weights)

                    if contrast.contrast_map is not None:
                        # Create contrast Map
                        # contrast.contrast_map.wasGeneratedBy(contrast.estimation)
                        self.bundle.wasGeneratedBy(contrast.contrast_map.id,
                                                   contrast.estimation.id)
                        self.add_object(contrast.contrast_map)
                        self.add_object(contrast.contrast_map.coord_space)
                        # Copy contrast map in export directory
                        self.add_object(contrast.contrast_map.file)

                        if contrast.contrast_map.derfrom is not None:
                            self.bundle.wasDerivedFrom(
                                contrast.contrast_map.id,
                                contrast.contrast_map.derfrom.id)
                            self.add_object(contrast.contrast_map.derfrom)
                            self.add_object(contrast.contrast_map.derfrom.file,
                                            export_file=False)

                    # Create Std Err. Map (T-tests) or Explained Mean Sq. Map
                    # (F-tests)
                    # contrast.stderr_or_expl_mean_sq_map.wasGeneratedBy
                    # (contrast.estimation)
                    stderr_explmeansq_map = (
                        contrast.stderr_or_expl_mean_sq_map)
                    self.bundle.wasGeneratedBy(
                        stderr_explmeansq_map.id,
                        contrast.estimation.id)
                    self.add_object(stderr_explmeansq_map)
                    self.add_object(
                        stderr_explmeansq_map.coord_space)
                    if isinstance(stderr_explmeansq_map,
                                  ContrastStdErrMap) and \
                            stderr_explmeansq_map.contrast_var:
                        self.add_object(
                            stderr_explmeansq_map.contrast_var)
                        if stderr_explmeansq_map.var_coord_space:
                            self.add_object(
                                stderr_explmeansq_map.var_coord_space)
                        if stderr_explmeansq_map.contrast_var.coord_space:
                            self.add_object(
                                stderr_explmeansq_map.contrast_var.coord_space)
                        self.add_object(
                            stderr_explmeansq_map.contrast_var.file,
                            export_file=False)
                        self.bundle.wasDerivedFrom(
                            stderr_explmeansq_map.id,
                            stderr_explmeansq_map.contrast_var.id)
                    self.add_object(stderr_explmeansq_map.file)

                    # Create Statistic Map
                    # contrast.stat_map.wasGeneratedBy(contrast.estimation)
                    self.bundle.wasGeneratedBy(contrast.stat_map.id,
                                               contrast.estimation.id)
                    self.add_object(contrast.stat_map)
                    self.add_object(contrast.stat_map.coord_space)
                    # Copy Statistical map in export directory
                    self.add_object(contrast.stat_map.file)

                    if contrast.stat_map.derfrom is not None:
                        self.bundle.wasDerivedFrom(
                            contrast.stat_map.id,
                            contrast.stat_map.derfrom.id)
                        self.add_object(contrast.stat_map.derfrom)
                        self.add_object(contrast.stat_map.derfrom.file,
                                        export_file=False)

                    # Create Z Statistic Map
                    if contrast.z_stat_map:
                        # contrast.z_stat_map.wasGeneratedBy(contrast.estimation)
                        self.bundle.wasGeneratedBy(contrast.z_stat_map.id,
                                                   contrast.estimation.id)
                        self.add_object(contrast.z_stat_map)
                        self.add_object(contrast.z_stat_map.coord_space)
                        # Copy Statistical map in export directory
                        self.add_object(contrast.z_stat_map.file)

                    # self.add_object(contrast)

            # Add inference steps
            for contrast_id, inferences in list(self.inferences.items()):
                contrast = self._get_contrast(contrast_id)

                for inference in inferences:
                    if contrast.z_stat_map:
                        used_id = contrast.z_stat_map.id
                    else:
                        used_id = contrast.stat_map.id
                    # inference.inference_act.used(used_id)
                    self.bundle.used(inference.inference_act.id, used_id)
                    # inference.inference_act.wasAssociatedWith(self.software)
                    self.bundle.wasAssociatedWith(inference.inference_act.id,
                                                  self.software.id)

                    # self.add_object(inference)
                    # Excursion set
                    # inference.excursion_set.wasGeneratedBy(inference.inference_act)
                    self.bundle.wasGeneratedBy(inference.excursion_set.id,
                                               inference.inference_act.id)
                    self.add_object(inference.excursion_set)
                    self.add_object(inference.excursion_set.coord_space)
                    if inference.excursion_set.visu is not None:
                        self.add_object(inference.excursion_set.visu)
                        if inference.excursion_set.visu.file is not None:
                            self.add_object(inference.excursion_set.visu.file)
                    # Copy "Excursion set map" file in export directory
                    self.add_object(inference.excursion_set.file)
                    if inference.excursion_set.clust_map is not None:
                        self.add_object(inference.excursion_set.clust_map)
                        self.add_object(inference.excursion_set.clust_map.file)
                        self.add_object(
                            inference.excursion_set.clust_map.coord_space)

                    if inference.excursion_set.mip is not None:
                        self.add_object(inference.excursion_set.mip)
                        self.add_object(inference.excursion_set.mip.file)

                    # Height threshold
                    if inference.height_thresh.equiv_thresh is not None:
                        for equiv in inference.height_thresh.equiv_thresh:
                            self.add_object(equiv)
                    self.add_object(inference.height_thresh)

                    # Extent threshold
                    if inference.extent_thresh.equiv_thresh is not None:
                        for equiv in inference.extent_thresh.equiv_thresh:
                            self.add_object(equiv)
                    self.add_object(inference.extent_thresh)

                    # Display Mask (potentially more than 1)
                    if inference.disp_mask:
                        for mask in inference.disp_mask:
                            # inference.inference_act.used(mask)
                            self.bundle.used(inference.inference_act.id,
                                             mask.id)
                            self.add_object(mask)
                            # Create coordinate space entity
                            self.add_object(mask.coord_space)
                            # Create "Display Mask Map" entity
                            self.add_object(mask.file)

                            if mask.derfrom is not None:
                                self.bundle.wasDerivedFrom(mask.id,
                                                           mask.derfrom.id)
                                self.add_object(mask.derfrom)
                                self.add_object(mask.derfrom.file,
                                                export_file=False)

                    # Search Space
                    self.bundle.wasGeneratedBy(inference.search_space.id,
                                               inference.inference_act.id)
                    # inference.search_space.wasGeneratedBy(inference.inference_act)
                    self.add_object(inference.search_space)
                    self.add_object(inference.search_space.coord_space)
                    # Copy "Mask map" in export directory
                    self.add_object(inference.search_space.file)

                    # Peak Definition
                    if inference.peak_criteria:
                        # inference.inference_act.used(inference.peak_criteria)
                        self.bundle.used(inference.inference_act.id,
                                         inference.peak_criteria.id)
                        self.add_object(inference.peak_criteria)

                    # Cluster Definition
                    if inference.cluster_criteria:
                        # inference.inference_act.used(inference.cluster_criteria)
                        self.bundle.used(inference.inference_act.id,
                                         inference.cluster_criteria.id)
                        self.add_object(inference.cluster_criteria)

                    if inference.clusters:
                        # Clusters and peaks
                        for cluster in inference.clusters:
                            # cluster.wasDerivedFrom(inference.excursion_set)
                            self.bundle.wasDerivedFrom(
                                cluster.id, inference.excursion_set.id)
                            self.add_object(cluster)
                            for peak in cluster.peaks:
                                self.bundle.wasDerivedFrom(peak.id, cluster.id)
                                self.add_object(peak)
                                self.add_object(peak.coordinate)

                            if cluster.cog is not None:
                                self.bundle.wasDerivedFrom(cluster.cog.id,
                                                           cluster.id)
                                self.add_object(cluster.cog)
                                self.add_object(cluster.cog.coordinate)

                    # Inference activity
                    # inference.inference_act.wasAssociatedWith(inference.software_id)
                    # inference.inference_act.used(inference.height_thresh)
                    self.bundle.used(inference.inference_act.id,
                                     inference.height_thresh.id)
                    # inference.inference_act.used(inference.extent_thresh)
                    self.bundle.used(inference.inference_act.id,
                                     inference.extent_thresh.id)
                    self.bundle.used(inference.inference_act.id,
                                     analysis_masks[contrast.estimation.id])
                    self.add_object(inference.inference_act)

            # Write-out prov file
            self.save_prov_to_files()

            return self.out_dir
        except Exception:
            self.cleanup()
            raise

    def _get_model_fitting(self, mf_id):
        """
        Retreive model fitting with identifier 'mf_id' from the list of model
        fitting objects stored in self.model_fitting
        """
        for model_fitting in self.model_fittings:
            if model_fitting.activity.id == mf_id:
                return model_fitting

        raise Exception("Model fitting activity with id: " + str(mf_id) +
                        " not found.")

    def _get_contrast(self, con_id):
        """
        Retreive contrast with identifier 'con_id' from the list of contrast
        objects stored in self.contrasts
        """
        for contrasts in list(self.contrasts.values()):
            for contrast in contrasts:
                if contrast.estimation.id == con_id:
                    return contrast
        raise Exception("Contrast activity with id: " + str(con_id) +
                        " not found.")

    def _add_namespaces(self):
        """
        Add namespaces to NIDM document.
        """
        self.doc.add_namespace(NIDM)
        self.doc.add_namespace(NIIRI)
        self.doc.add_namespace(CRYPTO)
        self.doc.add_namespace(DCT)
        self.doc.add_namespace(DC)
        self.doc.add_namespace(NFO)
        self.doc.add_namespace(OBO)
        self.doc.add_namespace(SCR)
        self.doc.add_namespace(NIF)

    def _create_bundle(self, version):
        """
        Initialise NIDM-Results bundle.
        """
        # *** Bundle entity
        if not hasattr(self, 'bundle_ent'):
            self.bundle_ent = NIDMResultsBundle(nidm_version=version['num'])

        self.bundle = ProvBundle(identifier=self.bundle_ent.id)

        self.bundle_ent.export(self.version, self.export_dir)

        # # provn export
        # self.bundle = ProvBundle(identifier=bundle_id)

        self.doc.entity(self.bundle_ent.id,
                        other_attributes=self.bundle_ent.attributes)

        # *** NIDM-Results Export Activity
        if version['num'] not in ["1.0.0", "1.1.0"]:
            if not hasattr(self, 'export_act'):
                self.export_act = NIDMResultsExport()
            self.export_act.export(self.version, self.export_dir)
            # self.doc.update(self.export_act.p)
            self.doc.activity(self.export_act.id,
                              other_attributes=self.export_act.attributes)

        # *** bundle was Generated by NIDM-Results Export Activity
        if not hasattr(self, 'export_time'):
            self.export_time = str(datetime.datetime.now().time())

        if version['num'] in ["1.0.0", "1.1.0"]:
            self.doc.wasGeneratedBy(entity=self.bundle_ent.id,
                                    time=self.export_time)
        else:
            # provn
            self.doc.wasGeneratedBy(
                entity=self.bundle_ent.id, activity=self.export_act.id,
                time=self.export_time)

        # *** NIDM-Results Exporter (Software Agent)
        if version['num'] not in ["1.0.0", "1.1.0"]:
            if not hasattr(self, 'exporter'):
                self.exporter = self._get_exporter()
            self.exporter.export(self.version, self.export_dir)
            # self.doc.update(self.exporter.p)
            self.doc.agent(self.exporter.id,
                           other_attributes=self.exporter.attributes)

            self.doc.wasAssociatedWith(self.export_act.id, self.exporter.id)

    def _get_model_parameters_estimations(self, error_model):
        """
        Infer model estimation method from the 'error_model'. Return an object
        of type ModelParametersEstimation.
        """
        if error_model.dependance == NIDM_INDEPEDENT_ERROR:
            if error_model.variance_homo:
                estimation_method = STATO_OLS
            else:
                estimation_method = STATO_WLS
        else:
            estimation_method = STATO_GLS

        mpe = ModelParametersEstimation(estimation_method, self.software.id)

        return mpe

    def use_prefixes(self, ttl):
        prefix_file = os.path.join(os.path.dirname(__file__), 'prefixes.csv')
        context = dict()
        with open(prefix_file, encoding="ascii") as csvfile:
            reader = csv.reader(csvfile)
            next(reader, None)  # skip the headers
            for alphanum_id, prefix, uri in reader:
                if alphanum_id in ttl:
                    context[prefix] = uri
                    ttl = "@prefix " + prefix + ": <" + uri + "> .\n" + ttl
                    ttl = ttl.replace(alphanum_id, prefix + ":")
                    if uri in ttl:
                        ttl = ttl.replace(alphanum_id, prefix + ":")
                elif uri in ttl:
                    context[prefix] = uri
                    ttl = "@prefix " + prefix + ": <" + uri + "> .\n" + ttl
                    ttl = ttl.replace(alphanum_id, prefix + ":")
        return (ttl, context)

    def save_prov_to_files(self, showattributes=False):
        """
        Write-out provn serialisation to nidm.provn.
        """
        self.doc.add_bundle(self.bundle)
        # provn_file = os.path.join(self.export_dir, 'nidm.provn')
        # provn_fid = open(provn_file, 'w')
        # # FIXME None
        # # provn_fid.write(self.doc.get_provn(4).replace("None", "-"))
        # provn_fid.close()

        ttl_file = os.path.join(self.export_dir, 'nidm.ttl')
        ttl_txt = self.doc.serialize(format='rdf', rdf_format='turtle')
        ttl_txt, json_context = self.use_prefixes(ttl_txt)

        # Add namespaces to json-ld context
        for namespace in self.doc._namespaces.get_registered_namespaces():
            json_context[namespace._prefix] = namespace._uri
        for namespace in \
                list(self.doc._namespaces._default_namespaces.values()):
            json_context[namespace._prefix] = namespace._uri
        json_context["xsd"] = "http://www.w3.org/2000/01/rdf-schema#"

        # Work-around to issue with INF value in rdflib (reported in
        # https://github.com/RDFLib/rdflib/pull/655)
        ttl_txt = ttl_txt.replace(' inf ', ' "INF"^^xsd:float ')
        with open(ttl_file, 'w') as ttl_fid:
            ttl_fid.write(ttl_txt)

        # print(json_context)
        jsonld_file = os.path.join(self.export_dir, 'nidm.json')
        jsonld_txt = self.doc.serialize(format='rdf', rdf_format='json-ld',
                                        context=json_context)
        with open(jsonld_file, 'w') as jsonld_fid:
            jsonld_fid.write(jsonld_txt)

        # provjsonld_file = os.path.join(self.export_dir, 'nidm.provjsonld')
        # provjsonld_txt = self.doc.serialize(format='jsonld')
        # with open(provjsonld_file, 'w') as provjsonld_fid:
        #     provjsonld_fid.write(provjsonld_txt)

        # provn_file = os.path.join(self.export_dir, 'nidm.provn')
        # provn_txt = self.doc.serialize(format='provn')
        # with open(provn_file, 'w') as provn_fid:
        #     provn_fid.write(provn_txt)

        # Post-processing
        if not self.zipped:
            # Just rename temp directory to output_path
            os.rename(self.export_dir, self.out_dir)
        else:
            # Create a zip file that contains the content of the temp directory
            os.chdir(self.export_dir)
            zf = zipfile.ZipFile(os.path.join("..", self.out_dir), mode='w')
            try:
                for root, dirnames, filenames in os.walk("."):
                    for filename in filenames:
                        zf.write(os.path.join(filename))
            finally:
                zf.close()
                # Need to move up before deleting the folder
                os.chdir("..")
                shutil.rmtree(os.path.join("..", self.export_dir))
def primer_example():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/primer.pn
    # ===========================================================================
    # document
    g = ProvDocument()

    #    prefix ex <http://example/>
    #    prefix dcterms <http://purl.org/dc/terms/>
    #    prefix foaf <http://xmlns.com/foaf/0.1/>
    ex = Namespace(
        "ex", "http://example/"
    )  # namespaces do not need to be explicitly added to a document
    g.add_namespace("dcterms", "http://purl.org/dc/terms/")
    g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")

    #    entity(ex:article, [dcterms:title="Crime rises in cities"])
    # first time the ex namespace was used, it is added to the document automatically
    g.entity(ex["article"], {"dcterms:title": "Crime rises in cities"})
    #    entity(ex:articleV1)
    g.entity(ex["articleV1"])
    #    entity(ex:articleV2)
    g.entity(ex["articleV2"])
    #    entity(ex:dataSet1)
    g.entity(ex["dataSet1"])
    #    entity(ex:dataSet2)
    g.entity(ex["dataSet2"])
    #    entity(ex:regionList)
    g.entity(ex["regionList"])
    #    entity(ex:composition)
    g.entity(ex["composition"])
    #    entity(ex:chart1)
    g.entity(ex["chart1"])
    #    entity(ex:chart2)
    g.entity(ex["chart2"])
    #    entity(ex:blogEntry)
    g.entity(ex["blogEntry"])

    #    activity(ex:compile)
    g.activity(
        "ex:compile")  # since ex is registered, it can be used like this
    #    activity(ex:compile2)
    g.activity("ex:compile2")
    #    activity(ex:compose)
    g.activity("ex:compose")
    #    activity(ex:correct, 2012-03-31T09:21:00, 2012-04-01T15:21:00)
    g.activity("ex:correct", "2012-03-31T09:21:00",
               "2012-04-01T15:21:00")  # date time can be provided as strings
    #    activity(ex:illustrate)
    g.activity("ex:illustrate")

    #    used(ex:compose, ex:dataSet1, -,   [ prov:role = "ex:dataToCompose"])
    g.used("ex:compose",
           "ex:dataSet1",
           other_attributes={"prov:role": "ex:dataToCompose"})
    #    used(ex:compose, ex:regionList, -, [ prov:role = "ex:regionsToAggregateBy"])
    g.used(
        "ex:compose",
        "ex:regionList",
        other_attributes={"prov:role": "ex:regionsToAggregateBy"},
    )
    #    wasGeneratedBy(ex:composition, ex:compose, -)
    g.wasGeneratedBy("ex:composition", "ex:compose")

    #    used(ex:illustrate, ex:composition, -)
    g.used("ex:illustrate", "ex:composition")
    #    wasGeneratedBy(ex:chart1, ex:illustrate, -)
    g.wasGeneratedBy("ex:chart1", "ex:illustrate")

    #    wasGeneratedBy(ex:chart1, ex:compile,  2012-03-02T10:30:00)
    g.wasGeneratedBy("ex:chart1", "ex:compile", "2012-03-02T10:30:00")
    #    wasGeneratedBy(ex:chart2, ex:compile2, 2012-04-01T15:21:00)
    #
    #
    #    agent(ex:derek, [ prov:type="prov:Person", foaf:givenName = "Derek",
    #           foaf:mbox= "<mailto:[email protected]>"])
    g.agent(
        "ex:derek",
        {
            "prov:type": PROV["Person"],
            "foaf:givenName": "Derek",
            "foaf:mbox": "<mailto:[email protected]>",
        },
    )
    #    wasAssociatedWith(ex:compose, ex:derek, -)
    g.wasAssociatedWith("ex:compose", "ex:derek")
    #    wasAssociatedWith(ex:illustrate, ex:derek, -)
    g.wasAssociatedWith("ex:illustrate", "ex:derek")
    #
    #    agent(ex:chartgen, [ prov:type="prov:Organization",
    #           foaf:name = "Chart Generators Inc"])
    g.agent(
        "ex:chartgen",
        {
            "prov:type": PROV["Organization"],
            "foaf:name": "Chart Generators Inc"
        },
    )
    #    actedOnBehalfOf(ex:derek, ex:chartgen, ex:compose)
    g.actedOnBehalfOf("ex:derek", "ex:chartgen", "ex:compose")
    #    wasAttributedTo(ex:chart1, ex:derek)
    g.wasAttributedTo("ex:chart1", "ex:derek")

    #    wasGeneratedBy(ex:dataSet2, ex:correct, -)
    g.wasGeneratedBy("ex:dataSet2", "ex:correct")
    #    used(ex:correct, ex:dataSet1, -)
    g.used("ex:correct", "ex:dataSet1")
    #    wasDerivedFrom(ex:dataSet2, ex:dataSet1, [prov:type='prov:Revision'])
    g.wasDerivedFrom("ex:dataSet2",
                     "ex:dataSet1",
                     other_attributes={"prov:type": PROV["Revision"]})
    #    wasDerivedFrom(ex:chart2, ex:dataSet2)
    g.wasDerivedFrom("ex:chart2", "ex:dataSet2")

    #    wasDerivedFrom(ex:blogEntry, ex:article, [prov:type='prov:Quotation'])
    g.wasDerivedFrom("ex:blogEntry",
                     "ex:article",
                     other_attributes={"prov:type": PROV["Quotation"]})
    #    specializationOf(ex:articleV1, ex:article)
    g.specializationOf("ex:articleV1", "ex:article")
    #    wasDerivedFrom(ex:articleV1, ex:dataSet1)
    g.wasDerivedFrom("ex:articleV1", "ex:dataSet1")

    #    specializationOf(ex:articleV2, ex:article)
    g.specializationOf("ex:articleV2", "ex:article")
    #    wasDerivedFrom(ex:articleV2, ex:dataSet2)
    g.wasDerivedFrom("ex:articleV2", "ex:dataSet2")

    #    alternateOf(ex:articleV2, ex:articleV1)
    g.alternateOf("ex:articleV2", "ex:articleV1")

    # endDocument
    return g
Beispiel #44
0
    def useGenDependency(self, aDO, usedList, genList, throughActivity):

        aID = throughActivity.id

        # create provlet
        d1 = ProvDocument()  # d1 is now an empty provenance document
        d1.add_namespace("dt", "http://cs.ncl.ac.uk/dtsim/")

        usedEntities = []
        for aRO in usedList:
            usedEntities.append(d1.entity(DTns + aRO.id))

        genEntities = []
        for aRO1 in genList:
            genEntities.append(d1.entity(DTns + aRO1.id))

        a = d1.activity(DTns + aID)
        ag1 = d1.agent(DTns + str(aDO.id))

        d1.wasAssociatedWith(a, ag1)
        for ue in usedEntities:
            d1.used(a, ue)

        for gene in genEntities:
            d1.wasAttributedTo(gene, ag1)
            d1.wasGeneratedBy(gene, a)

        # associate this provlet to each generated RO
        for aRO1 in genList:
            aRO1.provlet = d1

        print "event {n}: DO {do}: {ro1} <- wgby <- {act} <- used {ro}".format(
            n=currentReuseCount, do=aDO.id, ro1=aRO1.id, act=aID, ro=aRO.id
        )

        for genRO in genList:
            for uRO in usedList:
                # update upstream pointer
                genRO.upstream.append(
                    (uRO, throughActivity)
                )  # dep on aRO through activity aID   FIXME URGENTLY!!!  not designed for M-M

        for uRO in usedList:
            for genRO in genList:
                # update downstream
                uRO.downstream.append((genRO, throughActivity))  # aR1 is downstream from aR1 through activity aID

        # update global graph
        globalUsedEntities = []
        for aRO in usedList:
            globalUsedEntities.append(pGlobal.entity(DTns + aRO.id))

        globalGenEntities = []
        for aR1 in genList:
            globalGenEntities.append(pGlobal.entity(DTns + aR1.id))

        a = pGlobal.activity(DTns + aID)
        ag1 = pGlobal.agent(DTns + str(aDO.id))

        pGlobal.wasAssociatedWith(a, ag1)
        for ue in globalUsedEntities:
            pGlobal.used(a, ue)

        for gene in globalGenEntities:
            pGlobal.wasAttributedTo(gene, ag1)
            pGlobal.wasGeneratedBy(gene, a)

        # trigger credit recomputation
        # each used RO needs its credit updated with aRO1.credit for each generated aRO1 through activity aID
        aCreditManager.addGenerationCredit(usedList, genList, throughActivity)

        # 		self.notify(d1)
        return d1
def w3c_publication_2():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication2.prov-asn
    # ===========================================================================
    # bundle
    #
    # prefix ex <http://example.org/>
    # prefix rec <http://example.org/record>
    #
    # prefix w3 <http://www.w3.org/TR/2011/>
    # prefix hg <http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/>
    #
    #
    # entity(hg:Overview.html, [ prov:type="file in hg" ])
    # entity(w3:WD-prov-dm-20111215, [ prov:type="html4" ])
    #
    #
    # activity(ex:rcp,-,-,[prov:type="copy directory"])
    #
    # wasGeneratedBy(rec:g; w3:WD-prov-dm-20111215, ex:rcp, -)
    #
    # entity(ex:req3, [ prov:type="http://www.w3.org/2005/08/01-transitions.html#pubreq" %% xsd:anyURI ])
    #
    # used(rec:u; ex:rcp,hg:Overview.html,-)
    # used(ex:rcp, ex:req3, -)
    #
    #
    # wasDerivedFrom(w3:WD-prov-dm-20111215, hg:Overview.html, ex:rcp, rec:g, rec:u)
    #
    # agent(ex:webmaster, [ prov:type='prov:Person' ])
    #
    # wasAssociatedWith(ex:rcp, ex:webmaster, -)
    #
    # endBundle
    # ===========================================================================

    ex = Namespace("ex", "http://example.org/")
    rec = Namespace("rec", "http://example.org/record")
    w3 = Namespace("w3", "http://www.w3.org/TR/2011/")
    hg = Namespace(
        "hg",
        "http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/",
    )

    g = ProvDocument()

    g.entity(hg["Overview.html"], {"prov:type": "file in hg"})
    g.entity(w3["WD-prov-dm-20111215"], {"prov:type": "html4"})

    g.activity(ex["rcp"], None, None, {"prov:type": "copy directory"})

    g.wasGeneratedBy("w3:WD-prov-dm-20111215", "ex:rcp", identifier=rec["g"])

    g.entity(
        "ex:req3",
        {
            "prov:type":
            Identifier("http://www.w3.org/2005/08/01-transitions.html#pubreq")
        },
    )

    g.used("ex:rcp", "hg:Overview.html", identifier="rec:u")
    g.used("ex:rcp", "ex:req3")

    g.wasDerivedFrom("w3:WD-prov-dm-20111215", "hg:Overview.html", "ex:rcp",
                     "rec:g", "rec:u")

    g.agent("ex:webmaster", {"prov:type": "Person"})

    g.wasAssociatedWith("ex:rcp", "ex:webmaster")

    return g
def w3c_publication_1():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication1.prov-asn
    # ===========================================================================
    # bundle
    #
    # prefix ex  <http://example.org/>
    #
    # prefix w3      <http://www.w3.org/>
    # prefix tr      <http://www.w3.org/TR/2011/>
    # prefix process <http://www.w3.org/2005/10/Process-20051014/tr.html#>
    # prefix email   <https://lists.w3.org/Archives/Member/w3c-archive/>
    # prefix chairs  <https://lists.w3.org/Archives/Member/chairs/>
    # prefix trans   <http://www.w3.org/2005/08/01-transitions.html#>
    # prefix rec54   <http://www.w3.org/2001/02pd/rec54#>
    #
    #
    #  entity(tr:WD-prov-dm-20111018, [ prov:type='rec54:WD' ])
    #  entity(tr:WD-prov-dm-20111215, [ prov:type='rec54:WD' ])
    #  entity(process:rec-advance,    [ prov:type='prov:Plan' ])
    #
    #
    #  entity(chairs:2011OctDec/0004, [ prov:type='trans:transreq' ])
    #  entity(email:2011Oct/0141,     [ prov:type='trans:pubreq' ])
    #  entity(email:2011Dec/0111,     [ prov:type='trans:pubreq' ])
    #
    #
    #  wasDerivedFrom(tr:WD-prov-dm-20111215, tr:WD-prov-dm-20111018)
    #
    #
    #  activity(ex:act1,-,-,[prov:type="publish"])
    #  activity(ex:act2,-,-,[prov:type="publish"])
    #
    #  wasGeneratedBy(tr:WD-prov-dm-20111018, ex:act1, -)
    #  wasGeneratedBy(tr:WD-prov-dm-20111215, ex:act2, -)
    #
    #  used(ex:act1, chairs:2011OctDec/0004, -)
    #  used(ex:act1, email:2011Oct/0141, -)
    #  used(ex:act2, email:2011Dec/0111, -)
    #
    #  agent(w3:Consortium, [ prov:type='prov:Organization' ])
    #
    #  wasAssociatedWith(ex:act1, w3:Consortium, process:rec-advance)
    #  wasAssociatedWith(ex:act2, w3:Consortium, process:rec-advance)
    #
    # endBundle
    # ===========================================================================

    g = ProvDocument()
    g.add_namespace("ex", "http://example.org/")
    g.add_namespace("w3", "http://www.w3.org/")
    g.add_namespace("tr", "http://www.w3.org/TR/2011/")
    g.add_namespace("process",
                    "http://www.w3.org/2005/10/Process-20051014/tr.html#")
    g.add_namespace("email",
                    "https://lists.w3.org/Archives/Member/w3c-archive/")
    g.add_namespace("chairs", "https://lists.w3.org/Archives/Member/chairs/")
    g.add_namespace("trans", "http://www.w3.org/2005/08/01-transitions.html#")
    g.add_namespace("rec54", "http://www.w3.org/2001/02pd/rec54#")

    g.entity("tr:WD-prov-dm-20111018", {"prov:type": "rec54:WD"})
    g.entity("tr:WD-prov-dm-20111215", {"prov:type": "rec54:WD"})
    g.entity("process:rec-advance", {"prov:type": "prov:Plan"})

    g.entity("chairs:2011OctDec/0004", {"prov:type": "trans:transreq"})
    g.entity("email:2011Oct/0141", {"prov:type": "trans:pubreq"})
    g.entity("email:2011Dec/0111", {"prov:type": "trans:pubreq"})

    g.wasDerivedFrom("tr:WD-prov-dm-20111215", "tr:WD-prov-dm-20111018")

    g.activity("ex:act1", other_attributes={"prov:type": "publish"})
    g.activity("ex:act2", other_attributes={"prov:type": "publish"})

    g.wasGeneratedBy("tr:WD-prov-dm-20111018", "ex:act1")
    g.wasGeneratedBy("tr:WD-prov-dm-20111215", "ex:act2")

    g.used("ex:act1", "chairs:2011OctDec/0004")
    g.used("ex:act1", "email:2011Oct/0141")
    g.used("ex:act2", "email:2011Dec/0111")

    g.agent("w3:Consortium", other_attributes={"prov:type": "Organization"})

    g.wasAssociatedWith("ex:act1", "w3:Consortium", "process:rec-advance")
    g.wasAssociatedWith("ex:act2", "w3:Consortium", "process:rec-advance")

    return g
Beispiel #47
0
def ctfToProv():
    d1 = ProvDocument()
    dummy = ProvDocument()
    ex = Namespace('ex', 'http://example/')  # namespaces do not need to be explicitly added to a document
    #data = event_field(os.path.join(trace_path,'../config.yaml'))
    counter = 0
    #counter_1 = 0
    relationships = []
    entityActivityList = []
    # activities = []
    can_events = defaultdict(list)
    for event in trace_collection.events:
        dataset = {'ex:'+k:event[k] for k in event.field_list_with_scope(
            babeltrace.CTFScope.EVENT_FIELDS)}
        #dataset.update({'ex:'+'timestamp':(event['timestamp']/1000000000)})
        dataset.update({'ex:'+'name':event.name})

        # #calculates PGN

        # pf = str(bin(int(dataset['node_id'], 16)))[5:13]

        # if int(pf) > 240:
        #     pgn = int(str(bin(int(dataset['node_id'], 16)))[3:21], 2)
        # else:
        #     pgn = int(str(bin(int(dataset['node_id'], 16)))[3:13], 2)



        # #Gets source address.
        # sa = str(bin(int(dataset['node_id'], 16)))[-8:]  #gets last byte.

        sa = event['producer_id']

        activity = event['activity']


        e1 = d1.entity(ex['event'+str(counter)],dataset)

        #create class object to store entity and activity data field.

        entity_activity = entityActivity()

        entity_activity.addEntityActivity(e1, activity)
        #entityActivityList.append(e1)
        #can_events.setdefault(str(sa),[]).append(e1)

        can_events[sa].append(entity_activity)
        #node_id = d1.agent('ex:'+event['node_id'])
        controller_agent = d1.agent('ex:'+event['controller_id'])


        # activity = d1.activity('ex:'+event['activity']+str(counter))
        # activities.append(activity)




        #d1.wasGeneratedBy(e1, activity)
        # strings used to detect if the relationship already exists in the d1 document
        # association_relationship = str(dummy.wasAssociatedWith(activity, sa))


        # used_relationship = str(dummy.used(network_id, sa))

        #add activity to sensor agent
       # d1.wasAssociatedWith(activity,sensor_agent)
        #check if the association already esists
        # if association_relationship not in relationships:
        #     d1.wasAssociatedWith(activity,sensor_agent)
        #     relationships.append(association_relationship)
        # if used_relationship not in relationships:
        #     d1.used(network_id, sa)
        #     relationships.append(used_relationship)
        #counter+=1
        #counter_1 +=1
    # for index in range(len(entityActivityList)-1):
    #     d1.wasAssociatedWith(entityActivityList[index], entityActivityList[index + 1])

    # for index in range(len(entityActivityList)):
    #     d1.wasGeneratedBy(entityActivityList[index], activities[index])
    #     d1.wasAssociatedWith(activities[index],sa)



    for key in can_events.keys():

        producer_agent = d1.agent('ex:'+str(key))
        used_relationship = str(dummy.used(controller_agent, producer_agent))
        #association_relationship = str(dummy.wasAssociatedWith(activity, sa))

        if used_relationship not in relationships:
            d1.used(controller_agent, producer_agent)
            relationships.append(used_relationship)

        entityActivityList = can_events[key]

        for index in range(len(entityActivityList)-1):
            d1.wasAssociatedWith(entityActivityList[index].getEntity(), entityActivityList[index + 1].getEntity())
            d1.wasGeneratedBy(entityActivityList[index], entityActivityList[index].getActivity())
            d1.wasAssociatedWith(entityActivityList[index].getActivity(), producer_agent)


    return d1
def to_prov(obj, namespace, service):
    """
    :type obj: dict
    :rtype: prov.model.ProvDocument
    """
    g = ProvDocument()
    ap = Namespace('aip', 'https://araport.org/provenance/')

    g.add_namespace("dcterms", "http://purl.org/dc/terms/")
    g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")

    vaughn = g.agent(ap['matthew_vaughn'], {
        'prov:type': PROV["Person"], 'foaf:givenName': "Matthew Vaughn",
        'foaf:mbox': "<mailto:[email protected]>"
    })
    # Hard coded for now
    walter = g.agent(ap['walter_moreira'], {
        'prov:type': PROV["Person"], 'foaf:givenName': "Walter Moreira",
        'foaf:mbox': "<mailto:[email protected]>"
    })
    utexas = g.agent(ap['university_of_texas'], {
        'prov:type': PROV["Organization"],
        'foaf:givenName': "University of Texas at Austin"
    })
    g.actedOnBehalfOf(walter, utexas)
    g.actedOnBehalfOf(vaughn, utexas)
    adama_platform = g.agent(
        ap['adama_platform'],
        {'dcterms:title': "ADAMA",
         'dcterms:description': "Araport Data And Microservices API",
         'dcterms:language': "en-US",
         'dcterms:identifier': "https://api.araport.org/community/v0.3/",
         'dcterms:updated': "2015-04-17T09:44:56"})
    g.wasGeneratedBy(adama_platform, walter)
    g.wasGeneratedBy(adama_platform, vaughn)

    iden = service_iden(namespace, service)
    srv = service_store[iden]['service']
    adama_microservice = g.agent(
        ap[iden],
        {'dcterms:title': srv.name.title(),
         'dcterms:description': srv.description,
         'dcterms:language': "en-US",
         'dcterms:identifier': api_url_for('service',
                                           namespace=namespace,
                                           service=service),
         'dcterms:source': srv.git_repository
         })

    g.used(adama_microservice, adama_platform, datetime.datetime.now())

    for author in getattr(srv, 'authors', []):
        try:
            author_name = author['name']
            author_email = author['email']
        except KeyError:
            raise APIException(
                'name and email are required in author field')
        author_agent = g.agent(
            ap[slugify(author_name)],
            {'prov:type': PROV['Person'],
             'foaf:givenName': author_name,
             'foaf:mbox': '<mailto:{}>'.format(author_email)})
        sponsor_name = author.get('sponsor_organization_name', None)
        if sponsor_name:
            sponsor_agent = g.agent(
                ap[slugify(sponsor_name)],
                {'prov:type': PROV['Organization'],
                 'foaf:givenName': sponsor_name,
                 'dcterms:identifier': author.get('sponsor_uri', '')})
            g.actedOnBehalfOf(author_agent, sponsor_agent)
        g.wasGeneratedBy(adama_microservice,
                         author_agent,
                         datetime.datetime.now())

    sources_entities = process_sources(srv.sources, g, ap)
    for src in sources_entities:
        g.used(adama_microservice, src, datetime.datetime.now())

    response = g.entity(ap['adama_response'])
    g.wasGeneratedBy(response, ap[srv.type], datetime.datetime.now())
    g.used(ap[srv.type], adama_microservice, datetime.datetime.now())

    return g
Beispiel #49
0
def primer_example():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/primer.pn
    #===========================================================================
    # document
    g = ProvDocument()

    #    prefix ex <http://example/>
    #    prefix dcterms <http://purl.org/dc/terms/>
    #    prefix foaf <http://xmlns.com/foaf/0.1/>
    ex = Namespace('ex', 'http://example/')  # namespaces do not need to be explicitly added to a document
    g.add_namespace("dcterms", "http://purl.org/dc/terms/")
    g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")

    #    entity(ex:article, [dcterms:title="Crime rises in cities"])
    # first time the ex namespace was used, it is added to the document automatically
    g.entity(ex['article'], {'dcterms:title': "Crime rises in cities"})
    #    entity(ex:articleV1)
    g.entity(ex['articleV1'])
    #    entity(ex:articleV2)
    g.entity(ex['articleV2'])
    #    entity(ex:dataSet1)
    g.entity(ex['dataSet1'])
    #    entity(ex:dataSet2)
    g.entity(ex['dataSet2'])
    #    entity(ex:regionList)
    g.entity(ex['regionList'])
    #    entity(ex:composition)
    g.entity(ex['composition'])
    #    entity(ex:chart1)
    g.entity(ex['chart1'])
    #    entity(ex:chart2)
    g.entity(ex['chart2'])
    #    entity(ex:blogEntry)
    g.entity(ex['blogEntry'])

    #    activity(ex:compile)
    g.activity('ex:compile')  # since ex is registered, it can be used like this
    #    activity(ex:compile2)
    g.activity('ex:compile2')
    #    activity(ex:compose)
    g.activity('ex:compose')
    #    activity(ex:correct, 2012-03-31T09:21:00, 2012-04-01T15:21:00)
    g.activity('ex:correct', '2012-03-31T09:21:00', '2012-04-01T15:21:00')  # date time can be provided as strings
    #    activity(ex:illustrate)
    g.activity('ex:illustrate')

    #    used(ex:compose, ex:dataSet1, -,   [ prov:role = "ex:dataToCompose"])
    g.used('ex:compose', 'ex:dataSet1', other_attributes={'prov:role': "ex:dataToCompose"})
    #    used(ex:compose, ex:regionList, -, [ prov:role = "ex:regionsToAggregateBy"])
    g.used('ex:compose', 'ex:regionList', other_attributes={'prov:role': "ex:regionsToAggregateBy"})
    #    wasGeneratedBy(ex:composition, ex:compose, -)
    g.wasGeneratedBy('ex:composition', 'ex:compose')

    #    used(ex:illustrate, ex:composition, -)
    g.used('ex:illustrate', 'ex:composition')
    #    wasGeneratedBy(ex:chart1, ex:illustrate, -)
    g.wasGeneratedBy('ex:chart1', 'ex:illustrate')

    #    wasGeneratedBy(ex:chart1, ex:compile,  2012-03-02T10:30:00)
    g.wasGeneratedBy('ex:chart1', 'ex:compile', '2012-03-02T10:30:00')
    #    wasGeneratedBy(ex:chart2, ex:compile2, 2012-04-01T15:21:00)
    #
    #
    #    agent(ex:derek, [ prov:type="prov:Person", foaf:givenName = "Derek",
    #           foaf:mbox= "<mailto:[email protected]>"])
    g.agent('ex:derek', {
        'prov:type': PROV["Person"], 'foaf:givenName': "Derek", 'foaf:mbox': "<mailto:[email protected]>"
    })
    #    wasAssociatedWith(ex:compose, ex:derek, -)
    g.wasAssociatedWith('ex:compose', 'ex:derek')
    #    wasAssociatedWith(ex:illustrate, ex:derek, -)
    g.wasAssociatedWith('ex:illustrate', 'ex:derek')
    #
    #    agent(ex:chartgen, [ prov:type="prov:Organization",
    #           foaf:name = "Chart Generators Inc"])
    g.agent('ex:chartgen', {'prov:type': PROV["Organization"], 'foaf:name': "Chart Generators Inc"})
    #    actedOnBehalfOf(ex:derek, ex:chartgen, ex:compose)
    g.actedOnBehalfOf('ex:derek', 'ex:chartgen', 'ex:compose')
    #    wasAttributedTo(ex:chart1, ex:derek)
    g.wasAttributedTo('ex:chart1', 'ex:derek')

    #    wasGeneratedBy(ex:dataSet2, ex:correct, -)
    g.wasGeneratedBy('ex:dataSet2', 'ex:correct')
    #    used(ex:correct, ex:dataSet1, -)
    g.used('ex:correct', 'ex:dataSet1')
    #    wasDerivedFrom(ex:dataSet2, ex:dataSet1, [prov:type='prov:Revision'])
    g.wasDerivedFrom('ex:dataSet2', 'ex:dataSet1', other_attributes={'prov:type': PROV['Revision']})
    #    wasDerivedFrom(ex:chart2, ex:dataSet2)
    g.wasDerivedFrom('ex:chart2', 'ex:dataSet2')

    #    wasDerivedFrom(ex:blogEntry, ex:article, [prov:type='prov:Quotation'])
    g.wasDerivedFrom('ex:blogEntry', 'ex:article', other_attributes={'prov:type': PROV['Quotation']})
    #    specializationOf(ex:articleV1, ex:article)
    g.specializationOf('ex:articleV1', 'ex:article')
    #    wasDerivedFrom(ex:articleV1, ex:dataSet1)
    g.wasDerivedFrom('ex:articleV1', 'ex:dataSet1')

    #    specializationOf(ex:articleV2, ex:article)
    g.specializationOf('ex:articleV2', 'ex:article')
    #    wasDerivedFrom(ex:articleV2, ex:dataSet2)
    g.wasDerivedFrom('ex:articleV2', 'ex:dataSet2')

    #    alternateOf(ex:articleV2, ex:articleV1)
    g.alternateOf('ex:articleV2', 'ex:articleV1')

    # endDocument
    return g
Beispiel #50
0
def w3c_publication_1():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication1.prov-asn
    #===========================================================================
    # bundle
    #
    # prefix ex  <http://example.org/>
    #
    # prefix w3      <http://www.w3.org/>
    # prefix tr      <http://www.w3.org/TR/2011/>
    # prefix process <http://www.w3.org/2005/10/Process-20051014/tr.html#>
    # prefix email   <https://lists.w3.org/Archives/Member/w3c-archive/>
    # prefix chairs  <https://lists.w3.org/Archives/Member/chairs/>
    # prefix trans   <http://www.w3.org/2005/08/01-transitions.html#>
    # prefix rec54   <http://www.w3.org/2001/02pd/rec54#>
    #
    #
    #  entity(tr:WD-prov-dm-20111018, [ prov:type='rec54:WD' ])
    #  entity(tr:WD-prov-dm-20111215, [ prov:type='rec54:WD' ])
    #  entity(process:rec-advance,    [ prov:type='prov:Plan' ])
    #
    #
    #  entity(chairs:2011OctDec/0004, [ prov:type='trans:transreq' ])
    #  entity(email:2011Oct/0141,     [ prov:type='trans:pubreq' ])
    #  entity(email:2011Dec/0111,     [ prov:type='trans:pubreq' ])
    #
    #
    #  wasDerivedFrom(tr:WD-prov-dm-20111215, tr:WD-prov-dm-20111018)
    #
    #
    #  activity(ex:act1,-,-,[prov:type="publish"])
    #  activity(ex:act2,-,-,[prov:type="publish"])
    #
    #  wasGeneratedBy(tr:WD-prov-dm-20111018, ex:act1, -)
    #  wasGeneratedBy(tr:WD-prov-dm-20111215, ex:act2, -)
    #
    #  used(ex:act1, chairs:2011OctDec/0004, -)
    #  used(ex:act1, email:2011Oct/0141, -)
    #  used(ex:act2, email:2011Dec/0111, -)
    #
    #  agent(w3:Consortium, [ prov:type='prov:Organization' ])
    #
    #  wasAssociatedWith(ex:act1, w3:Consortium, process:rec-advance)
    #  wasAssociatedWith(ex:act2, w3:Consortium, process:rec-advance)
    #
    # endBundle
    #===========================================================================

    g = ProvDocument()
    g.add_namespace('ex', 'http://example.org/')
    g.add_namespace('w3', 'http://www.w3.org/')
    g.add_namespace('tr', 'http://www.w3.org/TR/2011/')
    g.add_namespace('process', 'http://www.w3.org/2005/10/Process-20051014/tr.html#')
    g.add_namespace('email', 'https://lists.w3.org/Archives/Member/w3c-archive/')
    g.add_namespace('chairs', 'https://lists.w3.org/Archives/Member/chairs/')
    g.add_namespace('trans', 'http://www.w3.org/2005/08/01-transitions.html#')
    g.add_namespace('rec54', 'http://www.w3.org/2001/02pd/rec54#')

    g.entity('tr:WD-prov-dm-20111018', {'prov:type': 'rec54:WD'})
    g.entity('tr:WD-prov-dm-20111215', {'prov:type': 'rec54:WD'})
    g.entity('process:rec-advance', {'prov:type': 'prov:Plan'})

    g.entity('chairs:2011OctDec/0004', {'prov:type': 'trans:transreq'})
    g.entity('email:2011Oct/0141', {'prov:type': 'trans:pubreq'})
    g.entity('email:2011Dec/0111', {'prov:type': 'trans:pubreq'})

    g.wasDerivedFrom('tr:WD-prov-dm-20111215', 'tr:WD-prov-dm-20111018')

    g.activity('ex:act1', other_attributes={'prov:type': "publish"})
    g.activity('ex:act2', other_attributes={'prov:type': "publish"})

    g.wasGeneratedBy('tr:WD-prov-dm-20111018', 'ex:act1')
    g.wasGeneratedBy('tr:WD-prov-dm-20111215', 'ex:act2')

    g.used('ex:act1', 'chairs:2011OctDec/0004')
    g.used('ex:act1', 'email:2011Oct/0141')
    g.used('ex:act2', 'email:2011Dec/0111')

    g.agent('w3:Consortium', other_attributes={'prov:type': "Organization"})

    g.wasAssociatedWith('ex:act1', 'w3:Consortium', 'process:rec-advance')
    g.wasAssociatedWith('ex:act2', 'w3:Consortium', 'process:rec-advance')

    return g
Beispiel #51
0
def w3c_publication_2():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication2.prov-asn
    #===========================================================================
    # bundle
    #
    # prefix ex <http://example.org/>
    # prefix rec <http://example.org/record>
    #
    # prefix w3 <http://www.w3.org/TR/2011/>
    # prefix hg <http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/>
    #
    #
    # entity(hg:Overview.html, [ prov:type="file in hg" ])
    # entity(w3:WD-prov-dm-20111215, [ prov:type="html4" ])
    #
    #
    # activity(ex:rcp,-,-,[prov:type="copy directory"])
    #
    # wasGeneratedBy(rec:g; w3:WD-prov-dm-20111215, ex:rcp, -)
    #
    # entity(ex:req3, [ prov:type="http://www.w3.org/2005/08/01-transitions.html#pubreq" %% xsd:anyURI ])
    #
    # used(rec:u; ex:rcp,hg:Overview.html,-)
    # used(ex:rcp, ex:req3, -)
    #
    #
    # wasDerivedFrom(w3:WD-prov-dm-20111215, hg:Overview.html, ex:rcp, rec:g, rec:u)
    #
    # agent(ex:webmaster, [ prov:type='prov:Person' ])
    #
    # wasAssociatedWith(ex:rcp, ex:webmaster, -)
    #
    # endBundle
    #===========================================================================

    ex = Namespace('ex', 'http://example.org/')
    rec = Namespace('rec', 'http://example.org/record')
    w3 = Namespace('w3', 'http://www.w3.org/TR/2011/')
    hg = Namespace('hg', 'http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/')

    g = ProvDocument()

    g.entity(hg['Overview.html'], {'prov:type': "file in hg"})
    g.entity(w3['WD-prov-dm-20111215'], {'prov:type': "html4"})

    g.activity(ex['rcp'], None, None, {'prov:type': "copy directory"})

    g.wasGeneratedBy('w3:WD-prov-dm-20111215', 'ex:rcp', identifier=rec['g'])

    g.entity('ex:req3', {'prov:type': Identifier("http://www.w3.org/2005/08/01-transitions.html#pubreq")})

    g.used('ex:rcp', 'hg:Overview.html', identifier='rec:u')
    g.used('ex:rcp', 'ex:req3')

    g.wasDerivedFrom('w3:WD-prov-dm-20111215', 'hg:Overview.html', 'ex:rcp', 'rec:g', 'rec:u')

    g.agent('ex:webmaster', {'prov:type': "Person"})

    g.wasAssociatedWith('ex:rcp', 'ex:webmaster')

    return g
Beispiel #52
0
class ProvenanceProfile:
    """
    Provenance profile.

    Populated as the workflow runs.
    """
    def __init__(
        self,
        research_object: "ResearchObject",
        full_name: str,
        host_provenance: bool,
        user_provenance: bool,
        orcid: str,
        fsaccess: StdFsAccess,
        run_uuid: Optional[uuid.UUID] = None,
    ) -> None:
        """Initialize the provenance profile."""
        self.fsaccess = fsaccess
        self.orcid = orcid
        self.research_object = research_object
        self.folder = self.research_object.folder
        self.document = ProvDocument()
        self.host_provenance = host_provenance
        self.user_provenance = user_provenance
        self.engine_uuid = research_object.engine_uuid  # type: str
        self.add_to_manifest = self.research_object.add_to_manifest
        if self.orcid:
            _logger.debug("[provenance] Creator ORCID: %s", self.orcid)
        self.full_name = full_name
        if self.full_name:
            _logger.debug("[provenance] Creator Full name: %s", self.full_name)
        self.workflow_run_uuid = run_uuid or uuid.uuid4()
        self.workflow_run_uri = self.workflow_run_uuid.urn  # type: str
        self.generate_prov_doc()

    def __str__(self) -> str:
        """Represent this Provenvance profile as a string."""
        return "ProvenanceProfile <{}> in <{}>".format(
            self.workflow_run_uri,
            self.research_object,
        )

    def generate_prov_doc(self) -> Tuple[str, ProvDocument]:
        """Add basic namespaces."""
        def host_provenance(document: ProvDocument) -> None:
            """Record host provenance."""
            document.add_namespace(CWLPROV)
            document.add_namespace(UUID)
            document.add_namespace(FOAF)

            hostname = getfqdn()
            # won't have a foaf:accountServiceHomepage for unix hosts, but
            # we can at least provide hostname
            document.agent(
                ACCOUNT_UUID,
                {
                    PROV_TYPE: FOAF["OnlineAccount"],
                    "prov:location": hostname,
                    CWLPROV["hostname"]: hostname,
                },
            )

        self.cwltool_version = "cwltool %s" % versionstring().split()[-1]
        self.document.add_namespace("wfprov",
                                    "http://purl.org/wf4ever/wfprov#")
        # document.add_namespace('prov', 'http://www.w3.org/ns/prov#')
        self.document.add_namespace("wfdesc",
                                    "http://purl.org/wf4ever/wfdesc#")
        # TODO: Make this ontology. For now only has cwlprov:image
        self.document.add_namespace("cwlprov", "https://w3id.org/cwl/prov#")
        self.document.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")
        self.document.add_namespace("schema", "http://schema.org/")
        self.document.add_namespace("orcid", "https://orcid.org/")
        self.document.add_namespace("id", "urn:uuid:")
        # NOTE: Internet draft expired 2004-03-04 (!)
        #  https://tools.ietf.org/html/draft-thiemann-hash-urn-01
        # TODO: Change to nih:sha-256; hashes
        #  https://tools.ietf.org/html/rfc6920#section-7
        self.document.add_namespace("data", "urn:hash::sha1:")
        # Also needed for docker images
        self.document.add_namespace(SHA256, "nih:sha-256;")

        # info only, won't really be used by prov as sub-resources use /
        self.document.add_namespace("researchobject",
                                    self.research_object.base_uri)
        # annotations
        self.metadata_ns = self.document.add_namespace(
            "metadata", self.research_object.base_uri + METADATA + "/")
        # Pre-register provenance directory so we can refer to its files
        self.provenance_ns = self.document.add_namespace(
            "provenance",
            self.research_object.base_uri + posix_path(PROVENANCE) + "/")
        ro_identifier_workflow = self.research_object.base_uri + "workflow/packed.cwl#"
        self.wf_ns = self.document.add_namespace("wf", ro_identifier_workflow)
        ro_identifier_input = (self.research_object.base_uri +
                               "workflow/primary-job.json#")
        self.document.add_namespace("input", ro_identifier_input)

        # More info about the account (e.g. username, fullname)
        # may or may not have been previously logged by user_provenance()
        # .. but we always know cwltool was launched (directly or indirectly)
        # by a user account, as cwltool is a command line tool
        account = self.document.agent(ACCOUNT_UUID)
        if self.orcid or self.full_name:
            person = {PROV_TYPE: PROV["Person"], "prov:type": SCHEMA["Person"]}
            if self.full_name:
                person["prov:label"] = self.full_name
                person["foaf:name"] = self.full_name
                person["schema:name"] = self.full_name
            else:
                # TODO: Look up name from ORCID API?
                pass
            agent = self.document.agent(self.orcid or uuid.uuid4().urn, person)
            self.document.actedOnBehalfOf(account, agent)
        else:
            if self.host_provenance:
                host_provenance(self.document)
            if self.user_provenance:
                self.research_object.user_provenance(self.document)
        # The execution of cwltool
        wfengine = self.document.agent(
            self.engine_uuid,
            {
                PROV_TYPE: PROV["SoftwareAgent"],
                "prov:type": WFPROV["WorkflowEngine"],
                "prov:label": self.cwltool_version,
            },
        )
        # FIXME: This datetime will be a bit too delayed, we should
        # capture when cwltool.py earliest started?
        self.document.wasStartedBy(wfengine, None, account,
                                   datetime.datetime.now())
        # define workflow run level activity
        self.document.activity(
            self.workflow_run_uri,
            datetime.datetime.now(),
            None,
            {
                PROV_TYPE: WFPROV["WorkflowRun"],
                "prov:label": "Run of workflow/packed.cwl#main",
            },
        )
        # association between SoftwareAgent and WorkflowRun
        main_workflow = "wf:main"
        self.document.wasAssociatedWith(self.workflow_run_uri,
                                        self.engine_uuid, main_workflow)
        self.document.wasStartedBy(self.workflow_run_uri, None,
                                   self.engine_uuid, datetime.datetime.now())
        return (self.workflow_run_uri, self.document)

    def evaluate(
        self,
        process: Process,
        job: JobsType,
        job_order_object: CWLObjectType,
        research_obj: "ResearchObject",
    ) -> None:
        """Evaluate the nature of job."""
        if not hasattr(process, "steps"):
            # record provenance of independent commandline tool executions
            self.prospective_prov(job)
            customised_job = copy_job_order(job, job_order_object)
            self.used_artefacts(customised_job, self.workflow_run_uri)
            research_obj.create_job(customised_job)
        elif hasattr(job, "workflow"):
            # record provenance of workflow executions
            self.prospective_prov(job)
            customised_job = copy_job_order(job, job_order_object)
            self.used_artefacts(customised_job, self.workflow_run_uri)

    def record_process_start(
            self,
            process: Process,
            job: JobsType,
            process_run_id: Optional[str] = None) -> Optional[str]:
        if not hasattr(process, "steps"):
            process_run_id = self.workflow_run_uri
        elif not hasattr(job, "workflow"):
            # commandline tool execution as part of workflow
            name = ""
            if isinstance(job, (CommandLineJob, JobBase, WorkflowJob)):
                name = job.name
            process_name = urllib.parse.quote(name, safe=":/,#")
            process_run_id = self.start_process(process_name,
                                                datetime.datetime.now())
        return process_run_id

    def start_process(
        self,
        process_name: str,
        when: datetime.datetime,
        process_run_id: Optional[str] = None,
    ) -> str:
        """Record the start of each Process."""
        if process_run_id is None:
            process_run_id = uuid.uuid4().urn
        prov_label = "Run of workflow/packed.cwl#main/" + process_name
        self.document.activity(
            process_run_id,
            None,
            None,
            {
                PROV_TYPE: WFPROV["ProcessRun"],
                PROV_LABEL: prov_label
            },
        )
        self.document.wasAssociatedWith(process_run_id, self.engine_uuid,
                                        str("wf:main/" + process_name))
        self.document.wasStartedBy(process_run_id, None, self.workflow_run_uri,
                                   when, None, None)
        return process_run_id

    def record_process_end(
        self,
        process_name: str,
        process_run_id: str,
        outputs: Union[CWLObjectType, MutableSequence[CWLObjectType], None],
        when: datetime.datetime,
    ) -> None:
        self.generate_output_prov(outputs, process_run_id, process_name)
        self.document.wasEndedBy(process_run_id, None, self.workflow_run_uri,
                                 when)

    def declare_file(
            self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, str]:
        if value["class"] != "File":
            raise ValueError("Must have class:File: %s" % value)
        # Need to determine file hash aka RO filename
        entity = None  # type: Optional[ProvEntity]
        checksum = None
        if "checksum" in value:
            csum = cast(str, value["checksum"])
            (method, checksum) = csum.split("$", 1)
            if method == SHA1 and self.research_object.has_data_file(checksum):
                entity = self.document.entity("data:" + checksum)

        if not entity and "location" in value:
            location = str(value["location"])
            # If we made it here, we'll have to add it to the RO
            with self.fsaccess.open(location, "rb") as fhandle:
                relative_path = self.research_object.add_data_file(fhandle)
                # FIXME: This naively relies on add_data_file setting hash as filename
                checksum = PurePath(relative_path).name
                entity = self.document.entity("data:" + checksum,
                                              {PROV_TYPE: WFPROV["Artifact"]})
                if "checksum" not in value:
                    value["checksum"] = f"{SHA1}${checksum}"

        if not entity and "contents" in value:
            # Anonymous file, add content as string
            entity, checksum = self.declare_string(cast(
                str, value["contents"]))

        # By here one of them should have worked!
        if not entity or not checksum:
            raise ValueError(
                "class:File but missing checksum/location/content: %r" % value)

        # Track filename and extension, this is generally useful only for
        # secondaryFiles. Note that multiple uses of a file might thus record
        # different names for the same entity, so we'll
        # make/track a specialized entity by UUID
        file_id = value.setdefault("@id", uuid.uuid4().urn)
        # A specialized entity that has just these names
        file_entity = self.document.entity(
            file_id,
            [(PROV_TYPE, WFPROV["Artifact"]), (PROV_TYPE, WF4EVER["File"])],
        )  # type: ProvEntity

        if "basename" in value:
            file_entity.add_attributes(
                {CWLPROV["basename"]: value["basename"]})
        if "nameroot" in value:
            file_entity.add_attributes(
                {CWLPROV["nameroot"]: value["nameroot"]})
        if "nameext" in value:
            file_entity.add_attributes({CWLPROV["nameext"]: value["nameext"]})
        self.document.specializationOf(file_entity, entity)

        # Check for secondaries
        for sec in cast(MutableSequence[CWLObjectType],
                        value.get("secondaryFiles", [])):
            # TODO: Record these in a specializationOf entity with UUID?
            if sec["class"] == "File":
                (sec_entity, _, _) = self.declare_file(sec)
            elif sec["class"] == "Directory":
                sec_entity = self.declare_directory(sec)
            else:
                raise ValueError(f"Got unexpected secondaryFiles value: {sec}")
            # We don't know how/when/where the secondary file was generated,
            # but CWL convention is a kind of summary/index derived
            # from the original file. As its generally in a different format
            # then prov:Quotation is not appropriate.
            self.document.derivation(
                sec_entity,
                file_entity,
                other_attributes={PROV["type"]: CWLPROV["SecondaryFile"]},
            )

        return file_entity, entity, checksum

    def declare_directory(self, value: CWLObjectType) -> ProvEntity:
        """Register any nested files/directories."""
        # FIXME: Calculate a hash-like identifier for directory
        # so we get same value if it's the same filenames/hashes
        # in a different location.
        # For now, mint a new UUID to identify this directory, but
        # attempt to keep it inside the value dictionary
        dir_id = cast(str, value.setdefault("@id", uuid.uuid4().urn))

        # New annotation file to keep the ORE Folder listing
        ore_doc_fn = dir_id.replace("urn:uuid:", "directory-") + ".ttl"
        dir_bundle = self.document.bundle(self.metadata_ns[ore_doc_fn])

        coll = self.document.entity(
            dir_id,
            [
                (PROV_TYPE, WFPROV["Artifact"]),
                (PROV_TYPE, PROV["Collection"]),
                (PROV_TYPE, PROV["Dictionary"]),
                (PROV_TYPE, RO["Folder"]),
            ],
        )
        # ORE description of ro:Folder, saved separately
        coll_b = dir_bundle.entity(
            dir_id,
            [(PROV_TYPE, RO["Folder"]), (PROV_TYPE, ORE["Aggregation"])],
        )
        self.document.mentionOf(dir_id + "#ore", dir_id, dir_bundle.identifier)

        # dir_manifest = dir_bundle.entity(
        #     dir_bundle.identifier, {PROV["type"]: ORE["ResourceMap"],
        #                             ORE["describes"]: coll_b.identifier})

        coll_attribs = [(ORE["isDescribedBy"], dir_bundle.identifier)]
        coll_b_attribs = []  # type: List[Tuple[Identifier, ProvEntity]]

        # FIXME: .listing might not be populated yet - hopefully
        # a later call to this method will sort that
        is_empty = True

        if "listing" not in value:
            get_listing(self.fsaccess, value)
        for entry in cast(MutableSequence[CWLObjectType],
                          value.get("listing", [])):
            is_empty = False
            # Declare child-artifacts
            entity = self.declare_artefact(entry)
            self.document.membership(coll, entity)
            # Membership relation aka our ORE Proxy
            m_id = uuid.uuid4().urn
            m_entity = self.document.entity(m_id)
            m_b = dir_bundle.entity(m_id)

            # PROV-O style Dictionary
            # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
            # ..as prov.py do not currently allow PROV-N extensions
            # like hadDictionaryMember(..)
            m_entity.add_asserted_type(PROV["KeyEntityPair"])

            m_entity.add_attributes({
                PROV["pairKey"]: entry["basename"],
                PROV["pairEntity"]: entity,
            })

            # As well as a being a
            # http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry
            m_b.add_asserted_type(RO["FolderEntry"])
            m_b.add_asserted_type(ORE["Proxy"])
            m_b.add_attributes({
                RO["entryName"]: entry["basename"],
                ORE["proxyIn"]: coll,
                ORE["proxyFor"]: entity,
            })
            coll_attribs.append((PROV["hadDictionaryMember"], m_entity))
            coll_b_attribs.append((ORE["aggregates"], m_b))

        coll.add_attributes(coll_attribs)
        coll_b.add_attributes(coll_b_attribs)

        # Also Save ORE Folder as annotation metadata
        ore_doc = ProvDocument()
        ore_doc.add_namespace(ORE)
        ore_doc.add_namespace(RO)
        ore_doc.add_namespace(UUID)
        ore_doc.add_bundle(dir_bundle)
        ore_doc = ore_doc.flattened()
        ore_doc_path = str(PurePosixPath(METADATA, ore_doc_fn))
        with self.research_object.write_bag_file(
                ore_doc_path) as provenance_file:
            ore_doc.serialize(provenance_file,
                              format="rdf",
                              rdf_format="turtle")
        self.research_object.add_annotation(dir_id, [ore_doc_fn],
                                            ORE["isDescribedBy"].uri)

        if is_empty:
            # Empty directory
            coll.add_asserted_type(PROV["EmptyCollection"])
            coll.add_asserted_type(PROV["EmptyDictionary"])
        self.research_object.add_uri(coll.identifier.uri)
        return coll

    def declare_string(self, value: str) -> Tuple[ProvEntity, str]:
        """Save as string in UTF-8."""
        byte_s = BytesIO(str(value).encode(ENCODING))
        data_file = self.research_object.add_data_file(byte_s,
                                                       content_type=TEXT_PLAIN)
        checksum = PurePosixPath(data_file).name
        # FIXME: Don't naively assume add_data_file uses hash in filename!
        data_id = "data:%s" % PurePosixPath(data_file).stem
        entity = self.document.entity(data_id, {
            PROV_TYPE: WFPROV["Artifact"],
            PROV_VALUE: str(value)
        })  # type: ProvEntity
        return entity, checksum

    def declare_artefact(self, value: Optional[CWLOutputType]) -> ProvEntity:
        """Create data artefact entities for all file objects."""
        if value is None:
            # FIXME: If this can happen in CWL, we'll
            # need a better way to represent this in PROV
            return self.document.entity(CWLPROV["None"], {PROV_LABEL: "None"})

        if isinstance(value, (bool, int, float)):
            # Typically used in job documents for flags

            # FIXME: Make consistent hash URIs for these
            # that somehow include the type
            # (so "1" != 1 != "1.0" != true)
            entity = self.document.entity(uuid.uuid4().urn,
                                          {PROV_VALUE: value})
            self.research_object.add_uri(entity.identifier.uri)
            return entity

        if isinstance(value, (str, str)):
            (entity, _) = self.declare_string(value)
            return entity

        if isinstance(value, bytes):
            # If we got here then we must be in Python 3
            byte_s = BytesIO(value)
            data_file = self.research_object.add_data_file(byte_s)
            # FIXME: Don't naively assume add_data_file uses hash in filename!
            data_id = "data:%s" % PurePosixPath(data_file).stem
            return self.document.entity(
                data_id,
                {
                    PROV_TYPE: WFPROV["Artifact"],
                    PROV_VALUE: str(value)
                },
            )

        if isinstance(value, MutableMapping):
            if "@id" in value:
                # Already processed this value, but it might not be in this PROV
                entities = self.document.get_record(value["@id"])
                if entities:
                    return entities[0]
                # else, unknown in PROV, re-add below as if it's fresh

            # Base case - we found a File we need to update
            if value.get("class") == "File":
                (entity, _, _) = self.declare_file(value)
                value["@id"] = entity.identifier.uri
                return entity

            if value.get("class") == "Directory":
                entity = self.declare_directory(value)
                value["@id"] = entity.identifier.uri
                return entity
            coll_id = value.setdefault("@id", uuid.uuid4().urn)
            # some other kind of dictionary?
            # TODO: also Save as JSON
            coll = self.document.entity(
                coll_id,
                [
                    (PROV_TYPE, WFPROV["Artifact"]),
                    (PROV_TYPE, PROV["Collection"]),
                    (PROV_TYPE, PROV["Dictionary"]),
                ],
            )

            if value.get("class"):
                _logger.warning("Unknown data class %s.", value["class"])
                # FIXME: The class might be "http://example.com/somethingelse"
                coll.add_asserted_type(CWLPROV[value["class"]])

            # Let's iterate and recurse
            coll_attribs = []  # type: List[Tuple[Identifier, ProvEntity]]
            for (key, val) in value.items():
                v_ent = self.declare_artefact(val)
                self.document.membership(coll, v_ent)
                m_entity = self.document.entity(uuid.uuid4().urn)
                # Note: only support PROV-O style dictionary
                # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
                # as prov.py do not easily allow PROV-N extensions
                m_entity.add_asserted_type(PROV["KeyEntityPair"])
                m_entity.add_attributes({
                    PROV["pairKey"]: str(key),
                    PROV["pairEntity"]: v_ent
                })
                coll_attribs.append((PROV["hadDictionaryMember"], m_entity))
            coll.add_attributes(coll_attribs)
            self.research_object.add_uri(coll.identifier.uri)
            return coll

        # some other kind of Collection?
        # TODO: also save as JSON
        try:
            members = []
            for each_input_obj in iter(value):
                # Recurse and register any nested objects
                e = self.declare_artefact(each_input_obj)
                members.append(e)

            # If we reached this, then we were allowed to iterate
            coll = self.document.entity(
                uuid.uuid4().urn,
                [
                    (PROV_TYPE, WFPROV["Artifact"]),
                    (PROV_TYPE, PROV["Collection"]),
                ],
            )
            if not members:
                coll.add_asserted_type(PROV["EmptyCollection"])
            else:
                for member in members:
                    # FIXME: This won't preserve order, for that
                    # we would need to use PROV.Dictionary
                    # with numeric keys
                    self.document.membership(coll, member)
            self.research_object.add_uri(coll.identifier.uri)
            # FIXME: list value does not support adding "@id"
            return coll
        except TypeError:
            _logger.warning("Unrecognized type %s of %r", type(value), value)
            # Let's just fall back to Python repr()
            entity = self.document.entity(uuid.uuid4().urn,
                                          {PROV_LABEL: repr(value)})
            self.research_object.add_uri(entity.identifier.uri)
            return entity

    def used_artefacts(
        self,
        job_order: Union[CWLObjectType, List[CWLObjectType]],
        process_run_id: str,
        name: Optional[str] = None,
    ) -> None:
        """Add used() for each data artefact."""
        if isinstance(job_order, list):
            for entry in job_order:
                self.used_artefacts(entry, process_run_id, name)
        else:
            # FIXME: Use workflow name in packed.cwl, "main" is wrong for nested workflows
            base = "main"
            if name is not None:
                base += "/" + name
            for key, value in job_order.items():
                prov_role = self.wf_ns[f"{base}/{key}"]
                try:
                    entity = self.declare_artefact(value)
                    self.document.used(
                        process_run_id,
                        entity,
                        datetime.datetime.now(),
                        None,
                        {"prov:role": prov_role},
                    )
                except OSError:
                    pass

    def generate_output_prov(
        self,
        final_output: Union[CWLObjectType, MutableSequence[CWLObjectType],
                            None],
        process_run_id: Optional[str],
        name: Optional[str],
    ) -> None:
        """Call wasGeneratedBy() for each output,copy the files into the RO."""
        if isinstance(final_output, MutableSequence):
            for entry in final_output:
                self.generate_output_prov(entry, process_run_id, name)
        elif final_output is not None:
            # Timestamp should be created at the earliest
            timestamp = datetime.datetime.now()

            # For each output, find/register the corresponding
            # entity (UUID) and document it as generated in
            # a role corresponding to the output
            for output, value in final_output.items():
                entity = self.declare_artefact(value)
                if name is not None:
                    name = urllib.parse.quote(str(name), safe=":/,#")
                    # FIXME: Probably not "main" in nested workflows
                    role = self.wf_ns[f"main/{name}/{output}"]
                else:
                    role = self.wf_ns["main/%s" % output]

                if not process_run_id:
                    process_run_id = self.workflow_run_uri

                self.document.wasGeneratedBy(entity, process_run_id, timestamp,
                                             None, {"prov:role": role})

    def prospective_prov(self, job: JobsType) -> None:
        """Create prospective prov recording as wfdesc prov:Plan."""
        if not isinstance(job, WorkflowJob):
            # direct command line tool execution
            self.document.entity(
                "wf:main",
                {
                    PROV_TYPE: WFDESC["Process"],
                    "prov:type": PROV["Plan"],
                    "prov:label": "Prospective provenance",
                },
            )
            return

        self.document.entity(
            "wf:main",
            {
                PROV_TYPE: WFDESC["Workflow"],
                "prov:type": PROV["Plan"],
                "prov:label": "Prospective provenance",
            },
        )

        for step in job.steps:
            stepnametemp = "wf:main/" + str(step.name)[5:]
            stepname = urllib.parse.quote(stepnametemp, safe=":/,#")
            provstep = self.document.entity(
                stepname,
                {
                    PROV_TYPE: WFDESC["Process"],
                    "prov:type": PROV["Plan"]
                },
            )
            self.document.entity(
                "wf:main",
                {
                    "wfdesc:hasSubProcess": provstep,
                    "prov:label": "Prospective provenance",
                },
            )
        # TODO: Declare roles/parameters as well

    def activity_has_provenance(self, activity, prov_ids):
        # type: (str, List[Identifier]) -> None
        """Add http://www.w3.org/TR/prov-aq/ relations to nested PROV files."""
        # NOTE: The below will only work if the corresponding metadata/provenance arcp URI
        # is a pre-registered namespace in the PROV Document
        attribs = [(PROV["has_provenance"], prov_id) for prov_id in prov_ids]
        self.document.activity(activity, other_attributes=attribs)
        # Tip: we can't use https://www.w3.org/TR/prov-links/#term-mention
        # as prov:mentionOf() is only for entities, not activities
        uris = [i.uri for i in prov_ids]
        self.research_object.add_annotation(activity, uris,
                                            PROV["has_provenance"].uri)

    def finalize_prov_profile(self, name):
        # type: (Optional[str]) -> List[Identifier]
        """Transfer the provenance related files to the RO."""
        # NOTE: Relative posix path
        if name is None:
            # main workflow, fixed filenames
            filename = "primary.cwlprov"
        else:
            # ASCII-friendly filename, avoiding % as we don't want %2520 in manifest.json
            wf_name = urllib.parse.quote(str(name), safe="").replace("%", "_")
            # Note that the above could cause overlaps for similarly named
            # workflows, but that's OK as we'll also include run uuid
            # which also covers thhe case of this step being run in
            # multiple places or iterations
            filename = f"{wf_name}.{self.workflow_run_uuid}.cwlprov"

        basename = str(PurePosixPath(PROVENANCE) / filename)

        # TODO: Also support other profiles than CWLProv, e.g. ProvOne

        # list of prov identifiers of provenance files
        prov_ids = []

        # https://www.w3.org/TR/prov-xml/
        with self.research_object.write_bag_file(basename +
                                                 ".xml") as provenance_file:
            self.document.serialize(provenance_file, format="xml", indent=4)
            prov_ids.append(self.provenance_ns[filename + ".xml"])

        # https://www.w3.org/TR/prov-n/
        with self.research_object.write_bag_file(basename +
                                                 ".provn") as provenance_file:
            self.document.serialize(provenance_file, format="provn", indent=2)
            prov_ids.append(self.provenance_ns[filename + ".provn"])

        # https://www.w3.org/Submission/prov-json/
        with self.research_object.write_bag_file(basename +
                                                 ".json") as provenance_file:
            self.document.serialize(provenance_file, format="json", indent=2)
            prov_ids.append(self.provenance_ns[filename + ".json"])

        # "rdf" aka https://www.w3.org/TR/prov-o/
        # which can be serialized to ttl/nt/jsonld (and more!)

        # https://www.w3.org/TR/turtle/
        with self.research_object.write_bag_file(basename +
                                                 ".ttl") as provenance_file:
            self.document.serialize(provenance_file,
                                    format="rdf",
                                    rdf_format="turtle")
            prov_ids.append(self.provenance_ns[filename + ".ttl"])

        # https://www.w3.org/TR/n-triples/
        with self.research_object.write_bag_file(basename +
                                                 ".nt") as provenance_file:
            self.document.serialize(provenance_file,
                                    format="rdf",
                                    rdf_format="ntriples")
            prov_ids.append(self.provenance_ns[filename + ".nt"])

        # https://www.w3.org/TR/json-ld/
        # TODO: Use a nice JSON-LD context
        # see also https://eprints.soton.ac.uk/395985/
        # 404 Not Found on https://provenance.ecs.soton.ac.uk/prov.jsonld :(
        with self.research_object.write_bag_file(basename +
                                                 ".jsonld") as provenance_file:
            self.document.serialize(provenance_file,
                                    format="rdf",
                                    rdf_format="json-ld")
            prov_ids.append(self.provenance_ns[filename + ".jsonld"])

        _logger.debug("[provenance] added provenance: %s", prov_ids)
        return prov_ids
Beispiel #53
0
    def write_targets_prov(self, tlist, C, bundle_id):
        #Initialisation
#         cs = b.agent('CrowdScanner')
         
        if self.document_id == -1:
            d = ProvDocument()
            d.add_namespace(AO)
            d.set_default_namespace(self.defaultns % self.game_id)
            if uploadprov:
                provstore_document = self.api.document.create(d, name="Operation%s CrowdScanner" % self.game_id, public=True)
                document_uri = provstore_document.url
                logging.info("prov doc URI: " + str(document_uri))
                self.provfilelist.append(provstore_document.id)
                self.savelocalrecord()
                self.document_id = provstore_document.id
         
        b = ProvDocument()  # Create a new document for this update
        b.add_namespace(AO)
        b.set_default_namespace(self.defaultns % self.game_id)            
            
        # cs to be used with all targets
        cs = b.agent('agent/CrowdScanner', (('prov:type', AO['IBCCAlgo']), ('prov:type', PROV['SoftwareAgent'])))
        
        timestamp = time.time()  # Record the timestamp at each update to generate unique identifiers        
        startTime = datetime.datetime.fromtimestamp(timestamp)
        endTime = startTime
        activity = b.activity('activity/cs/update_report_%s' % timestamp, startTime, endTime)
        activity.wasAssociatedWith(cs)

        #Add target and report entities
        for i, tdata in enumerate(tlist):
            if self.changedtargets[i]==0:
                continue
            
            #Target entity for target i
            tid = int(tdata[0])
            x = tdata[1]
            y = tdata[2]
#             targettype = tdata[3] #don't record here, it will be revealed and recorded by UAVs
            v = int(tdata[4])
            agentids = tdata[7]
            
            targetattributes = {'ao:longitude': x, 'ao:latitude': y, }
            #'ao:asset_type':str(targettype)}
            target_v0 = b.entity('cs/target/'+str(tid)+'.'+str(v), targetattributes)            
            #Post the root report if this is the first version
            if v==0:
                self.targets[tid] = b.entity('cs/target/'+str(tid))
            else:
                try:
                    target_v0.wasDerivedFrom(self.targetversions[tid])
                except KeyError:
                    logging.error("Got a key error for key " + str(tid) + ', which is supposed to be version' + str(v))
            self.targetversions[tid] = target_v0                    
            target_v0.specializationOf(self.targets[tid])
            target_v0.wasAttributedTo(cs)
            
            #Report entities for origins of target i
            for j, r in enumerate(self.target_rep_ids[i]):
                if r not in self.postedreports:
                    Crow = C[r,:]
                    x = Crow[1]
                    y = Crow[2]
                    reptext = tdata[5][j].decode('utf8')
                    # Try to replace unusual characters
                    reptext = reptext.encode('ascii', 'replace')  
                    agentid = agentids[j]
                    
                    reporter_name = 'agent/crowdreporter%s' % agentid
                    b.agent(reporter_name, (('prov:type', AO['CrowdReporter']), ('prov:type', PROV['Person'])))
                    
                    reportattributes = {'ao:longitude': x, 'ao:latitude': y, 'ao:report': reptext}
                    
                    self.postedreports[r] = b.entity('cs/report/'+str(r), reportattributes)
                    self.postedreports[r].wasAttributedTo(reporter_name)
                activity.used(self.postedreports[r])
                target_v0.wasDerivedFrom(self.postedreports[r])
        
        if uploadprov:
            #Invalidate old targets no longer in use
            for i,tid in enumerate(self.targets_to_invalidate):
                target_v = self.targetversions[tid]
                b.wasInvalidatedBy(target_v, activity)
            #Post the document to the server
            #bundle = b.bundle('crowd_scanner')
            bundle_id = 'bundle/csupdate/%s' % timestamp
            self.api.add_bundle(self.document_id, b.serialize(), bundle_id)