Beispiel #1
0
def add_commit(graph: ProvDocument, package: CommitModelPackage) -> ProvDocument:
    """Add commit activity, agents for author and committer, relations between agents and activity."""
    author, committer, commit = package.author, package.committer, package.commit
    graph.agent(*author)
    graph.agent(*committer)
    graph.activity(*commit)
    graph.wasAssociatedWith(commit.id, author.id)
    graph.wasAssociatedWith(commit.id, committer.id)
    return graph
def job2prov(job):
    """
    Create ProvDocument based on job description
    :param job: UWS job
    :return: ProvDocument
    """

    # job.jdl.content = {
    #     'description': description,
    #     'parameters': parameters,
    #     'results': results,
    #     'executionduration': execdur,
    #     'quote': quote
    # }
    # parameters[pname] = {
    #     'type': p.get('type'),
    #     'required': p.get('required'),
    #     'default': p.get('default'),
    #     'description': list(p)[0].text,
    # }
    # results[r.get('value')] = {
    #     'mediaType': r.get('mediaType'),
    #     'default': r.get('default'),
    #     'description': list(r)[0].text,
    # }

    pdoc = ProvDocument()
    # Declaring namespaces for various prefixes used in the example
    pdoc.add_namespace('prov', 'http://www.w3.org/ns/prov#')
    pdoc.add_namespace('voprov', 'http://www.ivoa.net/ns/voprov#')
    pdoc.add_namespace('cta', 'http://www.cta-observatory.org#')
    pdoc.add_namespace('uwsdata', 'https://voparis-uws-test.obspm.fr/rest/' + job.jobname + '/' + job.jobid + '/')
    pdoc.add_namespace('ctajobs', 'http://www.cta-observatory.org#')
    # Adding an activity
    ctbin = pdoc.activity('ctajobs:' + job.jobname, job.start_time, job.end_time)
    # TODO: add job description, version, url, ...
    # Agent
    pdoc.agent('cta:consortium', other_attributes={'prov:type': "Organization"})
    pdoc.wasAssociatedWith(ctbin, 'cta:consortium')
    # Entities, in and out with relations
    e_in = []
    for pname, pdict in job.jdl.content['parameters'].iteritems():
        #if pname.startswith('in'):
        if any(x in pdict['type'] for x in ['file', 'xs:anyURI']):
            e_in.append(pdoc.entity('uwsdata:parameters/' + pname))
            # TODO: use publisher_did? add prov attributes, add voprov attributes?
            ctbin.used(e_in[-1])
    e_out = []
    for rname, rdict in job.jdl.content['results'].iteritems():
        e_out.append(pdoc.entity('uwsdata:results/' + rname))
        # TODO: use publisher_did? add prov attributes, add voprov attributes?
        e_out[-1].wasGeneratedBy(ctbin)
        for e in e_in:
            e_out[-1].wasDerivedFrom(e)
    return pdoc
Beispiel #3
0
def ctfToProv():
    d1 = ProvDocument()
    dummy = ProvDocument()
    ex = Namespace(
        'ex', 'http://example/'
    )  # namespaces do not need to be explicitly added to a document
    #data = event_field(os.path.join(trace_path,'../config.yaml'))
    counter = 0
    counter_1 = 0
    relationships = []
    entities = []
    activities = []
    for event in trace_collection.events:
        dataset = {
            'ex:' + k: event[k]
            for k in event.field_list_with_scope(
                babeltrace.CTFScope.EVENT_FIELDS)
        }
        dataset.update(
            {'ex:' + 'timestamp': (event['timestamp'] / 1000000000)})
        #dataset.update({'ex:'+'name':event.name})

        e1 = d1.entity(ex['event' + str(counter)], dataset)
        entities.append(e1)
        producer_agent = d1.agent('ex:' + event['producer_id'])
        controller_agent = d1.agent('ex:' + event['controller_id'])
        activity = d1.activity('ex:' + event['activity'] + str(counter_1))
        activities.append(activity)
        d1.wasGeneratedBy(e1, activity)
        # strings used to detect if the relationship already exists in the d1 document
        association_relationship = str(
            dummy.wasAssociatedWith(activity, producer_agent))
        used_relationship = str(dummy.used(controller_agent, producer_agent))

        # Add activity to producer agent if it has not been added before.
        d1.wasAssociatedWith(activity, producer_agent)
        # if association_relationship not in relationships:
        #     d1.wasAssociatedWith(activity, producer_agent)
        #     relationships.append(association_relationship)

        # Add producer agent to controller agent if it has not been added yet.
        if used_relationship not in relationships:
            d1.used(controller_agent, producer_agent)
            relationships.append(used_relationship)

        # Add temporal relationship between this event and the previous one.
        if counter > 0:
            d1.wasAssociatedWith(entities[counter - 1], e1)

        counter += 1
        counter_1 += 1
    return d1
Beispiel #4
0
def gen_prov_graph(file_path, option):
    '''
      generates prov graph from form json file
      option = "all": add attribues to nodes
    '''
    form_file = open(file_path, "r")
    json_info = form_file.read()
    form_file.close()
    sf_dict = json.loads(json_info)

    d1 = ProvDocument()
    d1.add_namespace('subm',
                     'http://www.enes.org/enes_entity/data_submsission')

    global_in_out = d1.entity("subm:" + "form_name_xx")

    print("workflow definition: ", sf_dict['workflow'])
    for [act_name, act] in sf_dict['workflow']:

        print("adding entities for workflow_step: ", act_name)
        entity_in_dict = sf_dict[act_name]['entity_in']
        entity_out_dict = sf_dict[act_name]['entity_out']
        agent_dict = sf_dict[act_name]['agent']
        activity_dict = sf_dict[act_name]['activity']

        # generate nodes
        in_node = d1.entity("subm:" + entity_in_dict['i_name'])
        out_node = d1.entity("subm:" + entity_out_dict['i_name'])
        agent = d1.agent("subm:" + agent_dict['i_name'])
        activity = d1.activity("subm:" + activity_dict['i_name'])

        #clean up and prefix dictionaries
        entity_in_dict = prefix_dict(entity_in_dict, 'subm')
        entity_out_dict = prefix_dict(entity_out_dict, 'subm')
        agent_dict = prefix_dict(agent_dict, 'subm')
        activity_dict = prefix_dict(activity_dict, 'subm')

        if option == "all":
            in_node.add_attributes(entity_in_dict)
            out_node.add_attributes(entity_out_dict)
            agent.add_attributes(agent_dict)
            activity.add_attributes(activity_dict)

        # connect nodes in graph
        d1.wasGeneratedBy(out_node, activity)
        d1.used(activity, in_node)
        d1.wasAssociatedWith(activity, agent)
        d1.wasDerivedFrom(in_node, out_node)
        d1.used(activity, global_in_out)
        d1.wasGeneratedBy(global_in_out, activity)

    return d1
Beispiel #5
0
def ctfToProv():
    d1 = ProvDocument()
    dummy = ProvDocument()
    ex = Namespace('ex', 'http://example/')  # namespaces do not need to be explicitly added to a document
    #data = event_field(os.path.join(trace_path,'../config.yaml'))
    counter = 0
    counter_1 = 0
    relationships = []
    entities = []
    activities = []
    producer_events = {}
    for event in trace_collection.events:
        dataset = {'ex:'+k:event[k] for k in event.field_list_with_scope(
            babeltrace.CTFScope.EVENT_FIELDS)}
        dataset.update({'ex:'+'timestamp':(event['timestamp']/1000000000)})
        #dataset.update({'ex:'+'name':event.name})

        e1 = d1.entity(ex['event'+str(counter)],dataset)
        entities.append(e1)
        producer_agent = d1.agent('ex:'+event['producer_id'])
        if event['producer_id'] not in producer_events:
                producer_events[event['producer_id']] = []
        else:
                pel = producer_events[events['producer_id']]
                d1.wasAssociatedWith(pel[len(pel)-1], e1)
                pel.append(e1)
        controller_agent = d1.agent('ex:'+event['controller_id'])
        activity = d1.activity('ex:'+event['activity']+str(counter_1))
        activities.append(activity)
        d1.wasGeneratedBy(e1, activity)
        # strings used to detect if the relationship already exists in the d1 document
        association_relationship = str(dummy.wasAssociatedWith(activity, producer_agent))
        used_relationship = str(dummy.used(controller_agent, producer_agent))

        # Add activity to producer agent if it has not been added before.
        d1.wasAssociatedWith(activity, producer_agent)
        # if association_relationship not in relationships:
        #     d1.wasAssociatedWith(activity, producer_agent)
        #     relationships.append(association_relationship)

        # Add producer agent to controller agent if it has not been added yet.
        if used_relationship not in relationships:
            d1.used(controller_agent, producer_agent)
            relationships.append(used_relationship)

        # Add temporal relationship between this event and the previous one.
#        if counter > 0:
#            d1.wasAssociatedWith(entities[counter - 1], e1)

        counter+=1
        counter_1 +=1
    return d1
Beispiel #6
0
def add_resource_creation(graph: ProvDocument, package: ResourceModelPackage) -> ProvDocument:
    """Add model for resource creation."""
    creator, creation, resource, resource_version = package.creation
    graph.activity(*creation)
    graph.entity(*resource)
    graph.entity(*resource_version)
    graph.agent(*creator)
    graph.wasAssociatedWith(creation.id, creator.id)
    graph.wasAttributedTo(resource.id, creator.id)
    graph.wasAttributedTo(resource_version.id, creator.id)
    graph.wasGeneratedBy(resource.id, creation.id)
    graph.wasGeneratedBy(resource_version.id, creation.id)
    graph.specializationOf(resource_version.id, resource.id)
    return graph
Beispiel #7
0
def _create_trial_info(document: provo.ProvDocument, trial: Trial, suffix=""):
    invalid_identifiers = ["."]
    identifier = trial.script
    for char in invalid_identifiers:
        identifier = identifier.replace(char, "_")

    document.agent("{}{}".format(identifier, suffix),
                   [(provo.PROV_TYPE, provo.PROV["SoftwareAgent"]),
                    ("codeHash", trial.code_hash),
                    ("script", trial.script),
                    ("id", trial.id)])

    document.activity("trial{}Execution".format(trial.id), trial.start, trial.finish,
                      [("nowCommand", trial.command),
                       ("parentId", trial.parent_id),
                       ("inheritedId", trial.inherited_id)])

    document.wasAssociatedWith("trial{}Execution".format(trial.id), "{}{}".format(identifier, suffix), None,
                               "trial{}ExecutionByScript".format(trial.id))
Beispiel #8
0
def add_event_chain(graph: ProvDocument, package: ResourceModelPackage) -> ProvDocument:
    """Add chain of events beginning at the creation event."""
    previous_event = previous_resource_version = None
    for chain_link in package.event_chain:
        user, event, resource, resource_version = chain_link
        graph.entity(*resource)
        graph.entity(*resource_version)
        graph.activity(*event)
        graph.agent(*user)
        graph.wasAssociatedWith(event.id, user.id)
        graph.wasAttributedTo(resource_version.id, user.id)
        graph.specializationOf(resource_version.id, resource.id)
        if previous_event is not None and previous_resource_version is not None:
            graph.entity(*previous_resource_version)
            graph.activity(*previous_event)
            graph.wasGeneratedBy(resource_version.id, event.id)
            graph.used(event.id, previous_resource_version.id)
            graph.wasDerivedFrom(resource_version.id, previous_resource_version.id)
            graph.wasInformedBy(event.id, previous_event.id)
        previous_event = event
        previous_resource_version = resource_version
    return graph
Beispiel #9
0
def release_tag_model(graph: ProvDocument, packages: ReleaseTagPackage):
    for package in packages:
        if package.release_package is not None:
            r_user, release, release_event, release_evidence, assets = package.release_package
            graph.agent(*r_user)
            graph.entity(*release)
            graph.activity(*release_event)
            graph.entity(*release_evidence)
            for asset in assets:
                graph.entity(*asset)
                graph.hadMember(asset.id, release.id)

            graph.hadMember(release_evidence.id, release.id)
            graph.wasGeneratedBy(release.id, release_event.id)
            graph.wasAttributedTo(release.id, r_user.id)
            graph.wasAssociatedWith(release_event.id, r_user.id)

        if package.tag_package is not None:
            t_user, tag, tag_event = package.tag_package
            graph.agent(*t_user)
            graph.entity(*tag)
            graph.activity(*tag_event)

            if package.release_package is not None:
                graph.hadMember(tag.id, release.id)
            graph.wasGeneratedBy(tag.id, tag_event.id)
            graph.wasAttributedTo(tag.id, t_user.id)
            graph.wasAssociatedWith(tag_event.id, t_user.id)

        if package.commit_package is not None:
            author, commit_event, _, commit, _ = package.commit_package
            graph.agent(*author)
            graph.activity(*commit_event)
            graph.entity(*commit)

            if package.tag_package is not None:
                graph.hadMember(commit.id, tag.id)
            graph.wasGeneratedBy(commit.id, commit_event.id)
            graph.wasAttributedTo(commit.id, author.id)
            graph.wasAssociatedWith(commit_event.id, author.id)
    return graph
Beispiel #10
0
def ctfToProv():
    d1 = ProvDocument()
    dummy = ProvDocument()
    ex = Namespace(
        'ex', 'http://example/'
    )  # namespaces do not need to be explicitly added to a document
    #data = event_field(os.path.join(trace_path,'../config.yaml'))
    counter = 0
    #counter_1 = 0
    relationships = []
    entityActivityList = []
    # activities = []
    can_events = defaultdict(list)
    for event in trace_collection.events:
        dataset = {
            'ex:' + k: event[k]
            for k in event.field_list_with_scope(
                babeltrace.CTFScope.EVENT_FIELDS)
        }
        #dataset.update({'ex:'+'timestamp':(event['timestamp']/1000000000)})
        dataset.update({'ex:' + 'name': event.name})

        # #calculates PGN

        # pf = str(bin(int(dataset['node_id'], 16)))[5:13]

        # if int(pf) > 240:
        #     pgn = int(str(bin(int(dataset['node_id'], 16)))[3:21], 2)
        # else:
        #     pgn = int(str(bin(int(dataset['node_id'], 16)))[3:13], 2)

        # #Gets source address.
        # sa = str(bin(int(dataset['node_id'], 16)))[-8:]  #gets last byte.

        sa = event['producer_id']

        activity = event['activity']

        e1 = d1.entity(ex['event' + str(counter)], dataset)

        #create class object to store entity and activity data field.

        entity_activity = entityActivity()

        entity_activity.addEntityActivity(e1, activity)
        #entityActivityList.append(e1)
        #can_events.setdefault(str(sa),[]).append(e1)

        can_events[sa].append(entity_activity)
        #node_id = d1.agent('ex:'+event['node_id'])
        controller_agent = d1.agent('ex:' + event['controller_id'])

        # activity = d1.activity('ex:'+event['activity']+str(counter))
        # activities.append(activity)

        #d1.wasGeneratedBy(e1, activity)
        # strings used to detect if the relationship already exists in the d1 document
        # association_relationship = str(dummy.wasAssociatedWith(activity, sa))

        # used_relationship = str(dummy.used(network_id, sa))

        #add activity to sensor agent
    # d1.wasAssociatedWith(activity,sensor_agent)
    #check if the association already esists
    # if association_relationship not in relationships:
    #     d1.wasAssociatedWith(activity,sensor_agent)
    #     relationships.append(association_relationship)
    # if used_relationship not in relationships:
    #     d1.used(network_id, sa)
    #     relationships.append(used_relationship)
    #counter+=1
    #counter_1 +=1
    # for index in range(len(entityActivityList)-1):
    #     d1.wasAssociatedWith(entityActivityList[index], entityActivityList[index + 1])

    # for index in range(len(entityActivityList)):
    #     d1.wasGeneratedBy(entityActivityList[index], activities[index])
    #     d1.wasAssociatedWith(activities[index],sa)

    for key in can_events.keys():

        producer_agent = d1.agent('ex:' + str(key))
        used_relationship = str(dummy.used(controller_agent, producer_agent))
        #association_relationship = str(dummy.wasAssociatedWith(activity, sa))

        if used_relationship not in relationships:
            d1.used(controller_agent, producer_agent)
            relationships.append(used_relationship)

        entityActivityList = can_events[key]

        for index in range(len(entityActivityList) - 1):
            d1.wasAssociatedWith(entityActivityList[index].getEntity(),
                                 entityActivityList[index + 1].getEntity())
            d1.wasGeneratedBy(entityActivityList[index],
                              entityActivityList[index].getActivity())
            d1.wasAssociatedWith(entityActivityList[index].getActivity(),
                                 producer_agent)

    return d1
def primer_example():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/primer.pn
    # ===========================================================================
    # document
    g = ProvDocument()

    #    prefix ex <http://example/>
    #    prefix dcterms <http://purl.org/dc/terms/>
    #    prefix foaf <http://xmlns.com/foaf/0.1/>
    ex = Namespace(
        "ex", "http://example/"
    )  # namespaces do not need to be explicitly added to a document
    g.add_namespace("dcterms", "http://purl.org/dc/terms/")
    g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")

    #    entity(ex:article, [dcterms:title="Crime rises in cities"])
    # first time the ex namespace was used, it is added to the document automatically
    g.entity(ex["article"], {"dcterms:title": "Crime rises in cities"})
    #    entity(ex:articleV1)
    g.entity(ex["articleV1"])
    #    entity(ex:articleV2)
    g.entity(ex["articleV2"])
    #    entity(ex:dataSet1)
    g.entity(ex["dataSet1"])
    #    entity(ex:dataSet2)
    g.entity(ex["dataSet2"])
    #    entity(ex:regionList)
    g.entity(ex["regionList"])
    #    entity(ex:composition)
    g.entity(ex["composition"])
    #    entity(ex:chart1)
    g.entity(ex["chart1"])
    #    entity(ex:chart2)
    g.entity(ex["chart2"])
    #    entity(ex:blogEntry)
    g.entity(ex["blogEntry"])

    #    activity(ex:compile)
    g.activity(
        "ex:compile")  # since ex is registered, it can be used like this
    #    activity(ex:compile2)
    g.activity("ex:compile2")
    #    activity(ex:compose)
    g.activity("ex:compose")
    #    activity(ex:correct, 2012-03-31T09:21:00, 2012-04-01T15:21:00)
    g.activity("ex:correct", "2012-03-31T09:21:00",
               "2012-04-01T15:21:00")  # date time can be provided as strings
    #    activity(ex:illustrate)
    g.activity("ex:illustrate")

    #    used(ex:compose, ex:dataSet1, -,   [ prov:role = "ex:dataToCompose"])
    g.used("ex:compose",
           "ex:dataSet1",
           other_attributes={"prov:role": "ex:dataToCompose"})
    #    used(ex:compose, ex:regionList, -, [ prov:role = "ex:regionsToAggregateBy"])
    g.used(
        "ex:compose",
        "ex:regionList",
        other_attributes={"prov:role": "ex:regionsToAggregateBy"},
    )
    #    wasGeneratedBy(ex:composition, ex:compose, -)
    g.wasGeneratedBy("ex:composition", "ex:compose")

    #    used(ex:illustrate, ex:composition, -)
    g.used("ex:illustrate", "ex:composition")
    #    wasGeneratedBy(ex:chart1, ex:illustrate, -)
    g.wasGeneratedBy("ex:chart1", "ex:illustrate")

    #    wasGeneratedBy(ex:chart1, ex:compile,  2012-03-02T10:30:00)
    g.wasGeneratedBy("ex:chart1", "ex:compile", "2012-03-02T10:30:00")
    #    wasGeneratedBy(ex:chart2, ex:compile2, 2012-04-01T15:21:00)
    #
    #
    #    agent(ex:derek, [ prov:type="prov:Person", foaf:givenName = "Derek",
    #           foaf:mbox= "<mailto:[email protected]>"])
    g.agent(
        "ex:derek",
        {
            "prov:type": PROV["Person"],
            "foaf:givenName": "Derek",
            "foaf:mbox": "<mailto:[email protected]>",
        },
    )
    #    wasAssociatedWith(ex:compose, ex:derek, -)
    g.wasAssociatedWith("ex:compose", "ex:derek")
    #    wasAssociatedWith(ex:illustrate, ex:derek, -)
    g.wasAssociatedWith("ex:illustrate", "ex:derek")
    #
    #    agent(ex:chartgen, [ prov:type="prov:Organization",
    #           foaf:name = "Chart Generators Inc"])
    g.agent(
        "ex:chartgen",
        {
            "prov:type": PROV["Organization"],
            "foaf:name": "Chart Generators Inc"
        },
    )
    #    actedOnBehalfOf(ex:derek, ex:chartgen, ex:compose)
    g.actedOnBehalfOf("ex:derek", "ex:chartgen", "ex:compose")
    #    wasAttributedTo(ex:chart1, ex:derek)
    g.wasAttributedTo("ex:chart1", "ex:derek")

    #    wasGeneratedBy(ex:dataSet2, ex:correct, -)
    g.wasGeneratedBy("ex:dataSet2", "ex:correct")
    #    used(ex:correct, ex:dataSet1, -)
    g.used("ex:correct", "ex:dataSet1")
    #    wasDerivedFrom(ex:dataSet2, ex:dataSet1, [prov:type='prov:Revision'])
    g.wasDerivedFrom("ex:dataSet2",
                     "ex:dataSet1",
                     other_attributes={"prov:type": PROV["Revision"]})
    #    wasDerivedFrom(ex:chart2, ex:dataSet2)
    g.wasDerivedFrom("ex:chart2", "ex:dataSet2")

    #    wasDerivedFrom(ex:blogEntry, ex:article, [prov:type='prov:Quotation'])
    g.wasDerivedFrom("ex:blogEntry",
                     "ex:article",
                     other_attributes={"prov:type": PROV["Quotation"]})
    #    specializationOf(ex:articleV1, ex:article)
    g.specializationOf("ex:articleV1", "ex:article")
    #    wasDerivedFrom(ex:articleV1, ex:dataSet1)
    g.wasDerivedFrom("ex:articleV1", "ex:dataSet1")

    #    specializationOf(ex:articleV2, ex:article)
    g.specializationOf("ex:articleV2", "ex:article")
    #    wasDerivedFrom(ex:articleV2, ex:dataSet2)
    g.wasDerivedFrom("ex:articleV2", "ex:dataSet2")

    #    alternateOf(ex:articleV2, ex:articleV1)
    g.alternateOf("ex:articleV2", "ex:articleV1")

    # endDocument
    return g
Beispiel #12
0
def w3c_publication_2():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication2.prov-asn
    #===========================================================================
    # bundle
    #
    # prefix ex <http://example.org/>
    # prefix rec <http://example.org/record>
    #
    # prefix w3 <http://www.w3.org/TR/2011/>
    # prefix hg <http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/>
    #
    #
    # entity(hg:Overview.html, [ prov:type="file in hg" ])
    # entity(w3:WD-prov-dm-20111215, [ prov:type="html4" ])
    #
    #
    # activity(ex:rcp,-,-,[prov:type="copy directory"])
    #
    # wasGeneratedBy(rec:g; w3:WD-prov-dm-20111215, ex:rcp, -)
    #
    # entity(ex:req3, [ prov:type="http://www.w3.org/2005/08/01-transitions.html#pubreq" %% xsd:anyURI ])
    #
    # used(rec:u; ex:rcp,hg:Overview.html,-)
    # used(ex:rcp, ex:req3, -)
    #
    #
    # wasDerivedFrom(w3:WD-prov-dm-20111215, hg:Overview.html, ex:rcp, rec:g, rec:u)
    #
    # agent(ex:webmaster, [ prov:type='prov:Person' ])
    #
    # wasAssociatedWith(ex:rcp, ex:webmaster, -)
    #
    # endBundle
    #===========================================================================

    ex = Namespace('ex', 'http://example.org/')
    rec = Namespace('rec', 'http://example.org/record')
    w3 = Namespace('w3', 'http://www.w3.org/TR/2011/')
    hg = Namespace('hg', 'http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/')

    g = ProvDocument()

    g.entity(hg['Overview.html'], {'prov:type': "file in hg"})
    g.entity(w3['WD-prov-dm-20111215'], {'prov:type': "html4"})

    g.activity(ex['rcp'], None, None, {'prov:type': "copy directory"})

    g.wasGeneratedBy('w3:WD-prov-dm-20111215', 'ex:rcp', identifier=rec['g'])

    g.entity('ex:req3', {'prov:type': Identifier("http://www.w3.org/2005/08/01-transitions.html#pubreq")})

    g.used('ex:rcp', 'hg:Overview.html', identifier='rec:u')
    g.used('ex:rcp', 'ex:req3')

    g.wasDerivedFrom('w3:WD-prov-dm-20111215', 'hg:Overview.html', 'ex:rcp', 'rec:g', 'rec:u')

    g.agent('ex:webmaster', {'prov:type': "Person"})

    g.wasAssociatedWith('ex:rcp', 'ex:webmaster')

    return g
Beispiel #13
0
def primer_example():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/primer.pn
    #===========================================================================
    # document
    g = ProvDocument()

    #    prefix ex <http://example/>
    #    prefix dcterms <http://purl.org/dc/terms/>
    #    prefix foaf <http://xmlns.com/foaf/0.1/>
    ex = Namespace('ex', 'http://example/')  # namespaces do not need to be explicitly added to a document
    g.add_namespace("dcterms", "http://purl.org/dc/terms/")
    g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")

    #    entity(ex:article, [dcterms:title="Crime rises in cities"])
    # first time the ex namespace was used, it is added to the document automatically
    g.entity(ex['article'], {'dcterms:title': "Crime rises in cities"})
    #    entity(ex:articleV1)
    g.entity(ex['articleV1'])
    #    entity(ex:articleV2)
    g.entity(ex['articleV2'])
    #    entity(ex:dataSet1)
    g.entity(ex['dataSet1'])
    #    entity(ex:dataSet2)
    g.entity(ex['dataSet2'])
    #    entity(ex:regionList)
    g.entity(ex['regionList'])
    #    entity(ex:composition)
    g.entity(ex['composition'])
    #    entity(ex:chart1)
    g.entity(ex['chart1'])
    #    entity(ex:chart2)
    g.entity(ex['chart2'])
    #    entity(ex:blogEntry)
    g.entity(ex['blogEntry'])

    #    activity(ex:compile)
    g.activity('ex:compile')  # since ex is registered, it can be used like this
    #    activity(ex:compile2)
    g.activity('ex:compile2')
    #    activity(ex:compose)
    g.activity('ex:compose')
    #    activity(ex:correct, 2012-03-31T09:21:00, 2012-04-01T15:21:00)
    g.activity('ex:correct', '2012-03-31T09:21:00', '2012-04-01T15:21:00')  # date time can be provided as strings
    #    activity(ex:illustrate)
    g.activity('ex:illustrate')

    #    used(ex:compose, ex:dataSet1, -,   [ prov:role = "ex:dataToCompose"])
    g.used('ex:compose', 'ex:dataSet1', other_attributes={'prov:role': "ex:dataToCompose"})
    #    used(ex:compose, ex:regionList, -, [ prov:role = "ex:regionsToAggregateBy"])
    g.used('ex:compose', 'ex:regionList', other_attributes={'prov:role': "ex:regionsToAggregateBy"})
    #    wasGeneratedBy(ex:composition, ex:compose, -)
    g.wasGeneratedBy('ex:composition', 'ex:compose')

    #    used(ex:illustrate, ex:composition, -)
    g.used('ex:illustrate', 'ex:composition')
    #    wasGeneratedBy(ex:chart1, ex:illustrate, -)
    g.wasGeneratedBy('ex:chart1', 'ex:illustrate')

    #    wasGeneratedBy(ex:chart1, ex:compile,  2012-03-02T10:30:00)
    g.wasGeneratedBy('ex:chart1', 'ex:compile', '2012-03-02T10:30:00')
    #    wasGeneratedBy(ex:chart2, ex:compile2, 2012-04-01T15:21:00)
    #
    #
    #    agent(ex:derek, [ prov:type="prov:Person", foaf:givenName = "Derek",
    #           foaf:mbox= "<mailto:[email protected]>"])
    g.agent('ex:derek', {
        'prov:type': PROV["Person"], 'foaf:givenName': "Derek", 'foaf:mbox': "<mailto:[email protected]>"
    })
    #    wasAssociatedWith(ex:compose, ex:derek, -)
    g.wasAssociatedWith('ex:compose', 'ex:derek')
    #    wasAssociatedWith(ex:illustrate, ex:derek, -)
    g.wasAssociatedWith('ex:illustrate', 'ex:derek')
    #
    #    agent(ex:chartgen, [ prov:type="prov:Organization",
    #           foaf:name = "Chart Generators Inc"])
    g.agent('ex:chartgen', {'prov:type': PROV["Organization"], 'foaf:name': "Chart Generators Inc"})
    #    actedOnBehalfOf(ex:derek, ex:chartgen, ex:compose)
    g.actedOnBehalfOf('ex:derek', 'ex:chartgen', 'ex:compose')
    #    wasAttributedTo(ex:chart1, ex:derek)
    g.wasAttributedTo('ex:chart1', 'ex:derek')

    #    wasGeneratedBy(ex:dataSet2, ex:correct, -)
    g.wasGeneratedBy('ex:dataSet2', 'ex:correct')
    #    used(ex:correct, ex:dataSet1, -)
    g.used('ex:correct', 'ex:dataSet1')
    #    wasDerivedFrom(ex:dataSet2, ex:dataSet1, [prov:type='prov:Revision'])
    g.wasDerivedFrom('ex:dataSet2', 'ex:dataSet1', other_attributes={'prov:type': PROV['Revision']})
    #    wasDerivedFrom(ex:chart2, ex:dataSet2)
    g.wasDerivedFrom('ex:chart2', 'ex:dataSet2')

    #    wasDerivedFrom(ex:blogEntry, ex:article, [prov:type='prov:Quotation'])
    g.wasDerivedFrom('ex:blogEntry', 'ex:article', other_attributes={'prov:type': PROV['Quotation']})
    #    specializationOf(ex:articleV1, ex:article)
    g.specializationOf('ex:articleV1', 'ex:article')
    #    wasDerivedFrom(ex:articleV1, ex:dataSet1)
    g.wasDerivedFrom('ex:articleV1', 'ex:dataSet1')

    #    specializationOf(ex:articleV2, ex:article)
    g.specializationOf('ex:articleV2', 'ex:article')
    #    wasDerivedFrom(ex:articleV2, ex:dataSet2)
    g.wasDerivedFrom('ex:articleV2', 'ex:dataSet2')

    #    alternateOf(ex:articleV2, ex:articleV1)
    g.alternateOf('ex:articleV2', 'ex:articleV1')

    # endDocument
    return g
Beispiel #14
0
def ctfToProv():
    d1 = ProvDocument()
    dummy = ProvDocument()
    ex = Namespace('ex', 'http://example/')  # namespaces do not need to be explicitly added to a document
    #data = event_field(os.path.join(trace_path,'../config.yaml'))
    counter = 0
    #counter_1 = 0
    relationships = []
    entityActivityList = []
    # activities = []
    can_events = defaultdict(list)
    for event in trace_collection.events:
        dataset = {'ex:'+k:event[k] for k in event.field_list_with_scope(
            babeltrace.CTFScope.EVENT_FIELDS)}
        #dataset.update({'ex:'+'timestamp':(event['timestamp']/1000000000)})
        dataset.update({'ex:'+'name':event.name})

        # #calculates PGN

        # pf = str(bin(int(dataset['node_id'], 16)))[5:13]

        # if int(pf) > 240:
        #     pgn = int(str(bin(int(dataset['node_id'], 16)))[3:21], 2)
        # else:
        #     pgn = int(str(bin(int(dataset['node_id'], 16)))[3:13], 2)



        # #Gets source address.
        # sa = str(bin(int(dataset['node_id'], 16)))[-8:]  #gets last byte.

        sa = event['producer_id']

        activity = event['activity']


        e1 = d1.entity(ex['event'+str(counter)],dataset)

        #create class object to store entity and activity data field.

        entity_activity = entityActivity()

        entity_activity.addEntityActivity(e1, activity)
        #entityActivityList.append(e1)
        #can_events.setdefault(str(sa),[]).append(e1)

        can_events[sa].append(entity_activity)
        #node_id = d1.agent('ex:'+event['node_id'])
        controller_agent = d1.agent('ex:'+event['controller_id'])


        # activity = d1.activity('ex:'+event['activity']+str(counter))
        # activities.append(activity)




        #d1.wasGeneratedBy(e1, activity)
        # strings used to detect if the relationship already exists in the d1 document
        # association_relationship = str(dummy.wasAssociatedWith(activity, sa))


        # used_relationship = str(dummy.used(network_id, sa))

        #add activity to sensor agent
       # d1.wasAssociatedWith(activity,sensor_agent)
        #check if the association already esists
        # if association_relationship not in relationships:
        #     d1.wasAssociatedWith(activity,sensor_agent)
        #     relationships.append(association_relationship)
        # if used_relationship not in relationships:
        #     d1.used(network_id, sa)
        #     relationships.append(used_relationship)
        #counter+=1
        #counter_1 +=1
    # for index in range(len(entityActivityList)-1):
    #     d1.wasAssociatedWith(entityActivityList[index], entityActivityList[index + 1])

    # for index in range(len(entityActivityList)):
    #     d1.wasGeneratedBy(entityActivityList[index], activities[index])
    #     d1.wasAssociatedWith(activities[index],sa)



    for key in can_events.keys():

        producer_agent = d1.agent('ex:'+str(key))
        used_relationship = str(dummy.used(controller_agent, producer_agent))
        #association_relationship = str(dummy.wasAssociatedWith(activity, sa))

        if used_relationship not in relationships:
            d1.used(controller_agent, producer_agent)
            relationships.append(used_relationship)

        entityActivityList = can_events[key]

        for index in range(len(entityActivityList)-1):
            d1.wasAssociatedWith(entityActivityList[index].getEntity(), entityActivityList[index + 1].getEntity())
            d1.wasGeneratedBy(entityActivityList[index], entityActivityList[index].getActivity())
            d1.wasAssociatedWith(entityActivityList[index].getActivity(), producer_agent)


    return d1
Beispiel #15
0
def w3c_publication_1():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication1.prov-asn
    #===========================================================================
    # bundle
    #
    # prefix ex  <http://example.org/>
    #
    # prefix w3      <http://www.w3.org/>
    # prefix tr      <http://www.w3.org/TR/2011/>
    # prefix process <http://www.w3.org/2005/10/Process-20051014/tr.html#>
    # prefix email   <https://lists.w3.org/Archives/Member/w3c-archive/>
    # prefix chairs  <https://lists.w3.org/Archives/Member/chairs/>
    # prefix trans   <http://www.w3.org/2005/08/01-transitions.html#>
    # prefix rec54   <http://www.w3.org/2001/02pd/rec54#>
    #
    #
    #  entity(tr:WD-prov-dm-20111018, [ prov:type='rec54:WD' ])
    #  entity(tr:WD-prov-dm-20111215, [ prov:type='rec54:WD' ])
    #  entity(process:rec-advance,    [ prov:type='prov:Plan' ])
    #
    #
    #  entity(chairs:2011OctDec/0004, [ prov:type='trans:transreq' ])
    #  entity(email:2011Oct/0141,     [ prov:type='trans:pubreq' ])
    #  entity(email:2011Dec/0111,     [ prov:type='trans:pubreq' ])
    #
    #
    #  wasDerivedFrom(tr:WD-prov-dm-20111215, tr:WD-prov-dm-20111018)
    #
    #
    #  activity(ex:act1,-,-,[prov:type="publish"])
    #  activity(ex:act2,-,-,[prov:type="publish"])
    #
    #  wasGeneratedBy(tr:WD-prov-dm-20111018, ex:act1, -)
    #  wasGeneratedBy(tr:WD-prov-dm-20111215, ex:act2, -)
    #
    #  used(ex:act1, chairs:2011OctDec/0004, -)
    #  used(ex:act1, email:2011Oct/0141, -)
    #  used(ex:act2, email:2011Dec/0111, -)
    #
    #  agent(w3:Consortium, [ prov:type='prov:Organization' ])
    #
    #  wasAssociatedWith(ex:act1, w3:Consortium, process:rec-advance)
    #  wasAssociatedWith(ex:act2, w3:Consortium, process:rec-advance)
    #
    # endBundle
    #===========================================================================

    g = ProvDocument()
    g.add_namespace('ex', 'http://example.org/')
    g.add_namespace('w3', 'http://www.w3.org/')
    g.add_namespace('tr', 'http://www.w3.org/TR/2011/')
    g.add_namespace('process', 'http://www.w3.org/2005/10/Process-20051014/tr.html#')
    g.add_namespace('email', 'https://lists.w3.org/Archives/Member/w3c-archive/')
    g.add_namespace('chairs', 'https://lists.w3.org/Archives/Member/chairs/')
    g.add_namespace('trans', 'http://www.w3.org/2005/08/01-transitions.html#')
    g.add_namespace('rec54', 'http://www.w3.org/2001/02pd/rec54#')

    g.entity('tr:WD-prov-dm-20111018', {'prov:type': 'rec54:WD'})
    g.entity('tr:WD-prov-dm-20111215', {'prov:type': 'rec54:WD'})
    g.entity('process:rec-advance', {'prov:type': 'prov:Plan'})

    g.entity('chairs:2011OctDec/0004', {'prov:type': 'trans:transreq'})
    g.entity('email:2011Oct/0141', {'prov:type': 'trans:pubreq'})
    g.entity('email:2011Dec/0111', {'prov:type': 'trans:pubreq'})

    g.wasDerivedFrom('tr:WD-prov-dm-20111215', 'tr:WD-prov-dm-20111018')

    g.activity('ex:act1', other_attributes={'prov:type': "publish"})
    g.activity('ex:act2', other_attributes={'prov:type': "publish"})

    g.wasGeneratedBy('tr:WD-prov-dm-20111018', 'ex:act1')
    g.wasGeneratedBy('tr:WD-prov-dm-20111215', 'ex:act2')

    g.used('ex:act1', 'chairs:2011OctDec/0004')
    g.used('ex:act1', 'email:2011Oct/0141')
    g.used('ex:act2', 'email:2011Dec/0111')

    g.agent('w3:Consortium', other_attributes={'prov:type': "Organization"})

    g.wasAssociatedWith('ex:act1', 'w3:Consortium', 'process:rec-advance')
    g.wasAssociatedWith('ex:act2', 'w3:Consortium', 'process:rec-advance')

    return g
Beispiel #16
0
def w3c_publication_2():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication2.prov-asn
    #===========================================================================
    # bundle
    #
    # prefix ex <http://example.org/>
    # prefix rec <http://example.org/record>
    #
    # prefix w3 <http://www.w3.org/TR/2011/>
    # prefix hg <http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/>
    #
    #
    # entity(hg:Overview.html, [ prov:type="file in hg" ])
    # entity(w3:WD-prov-dm-20111215, [ prov:type="html4" ])
    #
    #
    # activity(ex:rcp,-,-,[prov:type="copy directory"])
    #
    # wasGeneratedBy(rec:g; w3:WD-prov-dm-20111215, ex:rcp, -)
    #
    # entity(ex:req3, [ prov:type="http://www.w3.org/2005/08/01-transitions.html#pubreq" %% xsd:anyURI ])
    #
    # used(rec:u; ex:rcp,hg:Overview.html,-)
    # used(ex:rcp, ex:req3, -)
    #
    #
    # wasDerivedFrom(w3:WD-prov-dm-20111215, hg:Overview.html, ex:rcp, rec:g, rec:u)
    #
    # agent(ex:webmaster, [ prov:type='prov:Person' ])
    #
    # wasAssociatedWith(ex:rcp, ex:webmaster, -)
    #
    # endBundle
    #===========================================================================

    ex = Namespace('ex', 'http://example.org/')
    rec = Namespace('rec', 'http://example.org/record')
    w3 = Namespace('w3', 'http://www.w3.org/TR/2011/')
    hg = Namespace(
        'hg',
        'http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/'
    )

    g = ProvDocument()

    g.entity(hg['Overview.html'], {'prov:type': "file in hg"})
    g.entity(w3['WD-prov-dm-20111215'], {'prov:type': "html4"})

    g.activity(ex['rcp'], None, None, {'prov:type': "copy directory"})

    g.wasGeneratedBy('w3:WD-prov-dm-20111215', 'ex:rcp', identifier=rec['g'])

    g.entity(
        'ex:req3', {
            'prov:type':
            Identifier("http://www.w3.org/2005/08/01-transitions.html#pubreq")
        })

    g.used('ex:rcp', 'hg:Overview.html', identifier='rec:u')
    g.used('ex:rcp', 'ex:req3')

    g.wasDerivedFrom('w3:WD-prov-dm-20111215', 'hg:Overview.html', 'ex:rcp',
                     'rec:g', 'rec:u')

    g.agent('ex:webmaster', {'prov:type': "Person"})

    g.wasAssociatedWith('ex:rcp', 'ex:webmaster')

    return g
def w3c_publication_1():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication1.prov-asn
    # ===========================================================================
    # bundle
    #
    # prefix ex  <http://example.org/>
    #
    # prefix w3      <http://www.w3.org/>
    # prefix tr      <http://www.w3.org/TR/2011/>
    # prefix process <http://www.w3.org/2005/10/Process-20051014/tr.html#>
    # prefix email   <https://lists.w3.org/Archives/Member/w3c-archive/>
    # prefix chairs  <https://lists.w3.org/Archives/Member/chairs/>
    # prefix trans   <http://www.w3.org/2005/08/01-transitions.html#>
    # prefix rec54   <http://www.w3.org/2001/02pd/rec54#>
    #
    #
    #  entity(tr:WD-prov-dm-20111018, [ prov:type='rec54:WD' ])
    #  entity(tr:WD-prov-dm-20111215, [ prov:type='rec54:WD' ])
    #  entity(process:rec-advance,    [ prov:type='prov:Plan' ])
    #
    #
    #  entity(chairs:2011OctDec/0004, [ prov:type='trans:transreq' ])
    #  entity(email:2011Oct/0141,     [ prov:type='trans:pubreq' ])
    #  entity(email:2011Dec/0111,     [ prov:type='trans:pubreq' ])
    #
    #
    #  wasDerivedFrom(tr:WD-prov-dm-20111215, tr:WD-prov-dm-20111018)
    #
    #
    #  activity(ex:act1,-,-,[prov:type="publish"])
    #  activity(ex:act2,-,-,[prov:type="publish"])
    #
    #  wasGeneratedBy(tr:WD-prov-dm-20111018, ex:act1, -)
    #  wasGeneratedBy(tr:WD-prov-dm-20111215, ex:act2, -)
    #
    #  used(ex:act1, chairs:2011OctDec/0004, -)
    #  used(ex:act1, email:2011Oct/0141, -)
    #  used(ex:act2, email:2011Dec/0111, -)
    #
    #  agent(w3:Consortium, [ prov:type='prov:Organization' ])
    #
    #  wasAssociatedWith(ex:act1, w3:Consortium, process:rec-advance)
    #  wasAssociatedWith(ex:act2, w3:Consortium, process:rec-advance)
    #
    # endBundle
    # ===========================================================================

    g = ProvDocument()
    g.add_namespace("ex", "http://example.org/")
    g.add_namespace("w3", "http://www.w3.org/")
    g.add_namespace("tr", "http://www.w3.org/TR/2011/")
    g.add_namespace("process",
                    "http://www.w3.org/2005/10/Process-20051014/tr.html#")
    g.add_namespace("email",
                    "https://lists.w3.org/Archives/Member/w3c-archive/")
    g.add_namespace("chairs", "https://lists.w3.org/Archives/Member/chairs/")
    g.add_namespace("trans", "http://www.w3.org/2005/08/01-transitions.html#")
    g.add_namespace("rec54", "http://www.w3.org/2001/02pd/rec54#")

    g.entity("tr:WD-prov-dm-20111018", {"prov:type": "rec54:WD"})
    g.entity("tr:WD-prov-dm-20111215", {"prov:type": "rec54:WD"})
    g.entity("process:rec-advance", {"prov:type": "prov:Plan"})

    g.entity("chairs:2011OctDec/0004", {"prov:type": "trans:transreq"})
    g.entity("email:2011Oct/0141", {"prov:type": "trans:pubreq"})
    g.entity("email:2011Dec/0111", {"prov:type": "trans:pubreq"})

    g.wasDerivedFrom("tr:WD-prov-dm-20111215", "tr:WD-prov-dm-20111018")

    g.activity("ex:act1", other_attributes={"prov:type": "publish"})
    g.activity("ex:act2", other_attributes={"prov:type": "publish"})

    g.wasGeneratedBy("tr:WD-prov-dm-20111018", "ex:act1")
    g.wasGeneratedBy("tr:WD-prov-dm-20111215", "ex:act2")

    g.used("ex:act1", "chairs:2011OctDec/0004")
    g.used("ex:act1", "email:2011Oct/0141")
    g.used("ex:act2", "email:2011Dec/0111")

    g.agent("w3:Consortium", other_attributes={"prov:type": "Organization"})

    g.wasAssociatedWith("ex:act1", "w3:Consortium", "process:rec-advance")
    g.wasAssociatedWith("ex:act2", "w3:Consortium", "process:rec-advance")

    return g
Beispiel #18
0
class LogProv():
    def __init__(self, log_dic):
        self._prov_doc = ProvDocument()
        vre_namespace = self._prov_doc.add_namespace(
            'vre', 'https://www.vre4eic.eu/log#')
        prov_namespace = self._prov_doc.add_namespace(
            'prov', 'http://www.w3.org/ns/prov#')
        if ('request_url_username' in log_dic
                and log_dic['request_url_username']):
            remote_host = self._prov_doc.agent(
                vre_namespace['ag1'], {
                    prov_namespace['type']: PROV["SoftwareAgent"],
                    vre_namespace['hasIP']: log_dic['remote_host'],
                    vre_namespace['hasUsername']:
                    log_dic['request_url_username']
                })
        else:
            remote_host = self._prov_doc.agent(
                vre_namespace['ag1'], {
                    prov_namespace['type']: PROV["SoftwareAgent"],
                    vre_namespace['hasIP']: log_dic['remote_host']
                })

        if ('request_url_hostname' in log_dic
                and log_dic['request_url_hostname']):
            request_hostname = self._prov_doc.agent(
                vre_namespace['ag2'], {
                    prov_namespace['type']: PROV["SoftwareAgent"],
                    vre_namespace['hasIP']: log_dic['remote_host']
                })

        request_entity = self._prov_doc.entity(
            vre_namespace['en1'], {
                vre_namespace['status']: log_dic['status'],
                vre_namespace['responseBytes']: log_dic['response_bytes_clf']
            })

        received_activity = self._prov_doc.activity(
            vre_namespace['ac1'],
            other_attributes={
                vre_namespace['requestURL']: log_dic['request_url'],
                vre_namespace['requestMethod']: log_dic['request_method'],
                vre_namespace['httpVersion']: log_dic['request_http_ver']
            })
        self._prov_doc.generation(remote_host,
                                  activity=received_activity,
                                  time=log_dic['time_received_tz_isoformat'])
        self._prov_doc.wasAttributedTo(request_entity, received_activity)
        self._prov_doc.wasAssociatedWith(received_activity, remote_host)

    @property
    def prov_doc(self):
        return self._prov_doc

    @prov_doc.setter
    def prov_doc(self, value):
        self._prov_doc = value

    @prov_doc.deleter
    def prov_doc(self):
        del self._prov_doc
Beispiel #19
0
class NIDMExporter():

    """
    Generic class to parse a result directory to extract the pieces of
    information to be stored in NIDM-Results and to generate a NIDM-Results
    export.
    """

    def __init__(self, version, out_dir, zipped=True):
        out_dirname = os.path.basename(out_dir)
        out_path = os.path.dirname(out_dir)

        # Create output path from output name
        self.zipped = zipped
        if not self.zipped:
            out_dirname = out_dirname+".nidm"
        else:
            out_dirname = out_dirname+".nidm.zip"
        out_dir = os.path.join(out_path, out_dirname)

        # Quit if output path already exists and user doesn't want to overwrite
        # it
        if os.path.exists(out_dir):
            msg = out_dir+" already exists, overwrite?"
            if not input("%s (y/N) " % msg).lower() == 'y':
                quit("Bye.")
            if os.path.isdir(out_dir):
                shutil.rmtree(out_dir)
            else:
                os.remove(out_dir)
        self.out_dir = out_dir

        if version == "dev":
            self.version = {'major': 10000, 'minor': 0, 'revision': 0,
                            'num': version}
        else:
            major, minor, revision = version.split(".")
            if "-rc" in revision:
                revision, rc = revision.split("-rc")
            else:
                rc = -1
            self.version = {'major': int(major), 'minor': int(minor),
                            'revision': int(revision), 'rc': int(rc),
                            'num': version}

        # Initialise prov document
        self.doc = ProvDocument()
        self._add_namespaces()

        # A temp directory that will contain the exported data
        self.export_dir = tempfile.mkdtemp(prefix="nidm-", dir=out_path)

        self.prepend_path = ''

    def parse(self):
        """
        Parse a result directory to extract the pieces information to be
        stored in NIDM-Results.
        """

        try:
            # Methods: find_software, find_model_fitting, find_contrasts and
            # find_inferences should be defined in the children classes and
            # return a list of NIDM Objects as specified in the objects module

            # Object of type Software describing the neuroimaging software
            # package used for the analysis
            self.software = self._find_software()

            # List of objects of type ModelFitting describing the
            # model fitting step in NIDM-Results (main activity: Model
            # Parameters Estimation)
            self.model_fittings = self._find_model_fitting()

            # Dictionary of (key, value) pairs where where key is a tuple
            # containing the identifier of a ModelParametersEstimation object
            # and a tuple of identifiers of ParameterEstimateMap objects and
            # value is an object of type Contrast describing the contrast
            # estimation step in NIDM-Results (main activity: Contrast
            # Estimation)
            self.contrasts = self._find_contrasts()

            # Inference activity and entities
            # Dictionary of (key, value) pairs where key is the identifier of a
            # ContrastEstimation object and value is an object of type
            # Inference describing the inference step in NIDM-Results (main
            # activity: Inference)
            self.inferences = self._find_inferences()
        except Exception:
            self.cleanup()
            raise

    def cleanup(self):
        if os.path.isdir(self.export_dir):
            shutil.rmtree(self.export_dir)

    def add_object(self, nidm_object, export_file=True):
        """
        Add a NIDMObject to a NIDM-Results export.
        """
        if not export_file:
            export_dir = None
        else:
            export_dir = self.export_dir

        if not isinstance(nidm_object, NIDMFile):
            nidm_object.export(self.version, export_dir)
        else:
            nidm_object.export(self.version, export_dir, self.prepend_path)
        # ProvDocument: add object to the bundle
        if nidm_object.prov_type == PROV['Activity']:
            self.bundle.activity(nidm_object.id,
                                 other_attributes=nidm_object.attributes)
        elif nidm_object.prov_type == PROV['Entity']:
            self.bundle.entity(nidm_object.id,
                               other_attributes=nidm_object.attributes)
        elif nidm_object.prov_type == PROV['Agent']:
            self.bundle.agent(nidm_object.id,
                              other_attributes=nidm_object.attributes)
        # self.bundle.update(nidm_object.p)

    def export(self):
        """
        Generate a NIDM-Results export.
        """
        try:
            if not os.path.isdir(self.export_dir):
                os.mkdir(self.export_dir)

            # Initialise main bundle
            self._create_bundle(self.version)

            self.add_object(self.software)

            # Add model fitting steps
            if not isinstance(self.model_fittings, list):
                self.model_fittings = list(self.model_fittings.values())

            for model_fitting in self.model_fittings:
                # Design Matrix
                # model_fitting.activity.used(model_fitting.design_matrix)
                self.bundle.used(model_fitting.activity.id,
                                 model_fitting.design_matrix.id)
                self.add_object(model_fitting.design_matrix)
                # *** Export visualisation of the design matrix
                self.add_object(model_fitting.design_matrix.image)

                if model_fitting.design_matrix.image.file is not None:
                    self.add_object(model_fitting.design_matrix.image.file)

                if model_fitting.design_matrix.hrf_models is not None:
                    # drift model
                    self.add_object(model_fitting.design_matrix.drift_model)

                if self.version['major'] > 1 or \
                        (self.version['major'] == 1 and
                         self.version['minor'] >= 3):
                    # Machine
                    # model_fitting.data.wasAttributedTo(model_fitting.machine)
                    self.bundle.wasAttributedTo(model_fitting.data.id,
                                                model_fitting.machine.id)
                    self.add_object(model_fitting.machine)

                    # Imaged subject or group(s)
                    for sub in model_fitting.subjects:
                        self.add_object(sub)
                        # model_fitting.data.wasAttributedTo(sub)
                        self.bundle.wasAttributedTo(model_fitting.data.id,
                                                    sub.id)

                # Data
                # model_fitting.activity.used(model_fitting.data)
                self.bundle.used(model_fitting.activity.id,
                                 model_fitting.data.id)
                self.add_object(model_fitting.data)

                # Error Model
                # model_fitting.activity.used(model_fitting.error_model)
                self.bundle.used(model_fitting.activity.id,
                                 model_fitting.error_model.id)
                self.add_object(model_fitting.error_model)

                # Parameter Estimate Maps
                for param_estimate in model_fitting.param_estimates:
                    # param_estimate.wasGeneratedBy(model_fitting.activity)
                    self.bundle.wasGeneratedBy(param_estimate.id,
                                               model_fitting.activity.id)
                    self.add_object(param_estimate)
                    self.add_object(param_estimate.coord_space)
                    self.add_object(param_estimate.file)

                    if param_estimate.derfrom is not None:
                        self.bundle.wasDerivedFrom(param_estimate.id,
                                                   param_estimate.derfrom.id)
                        self.add_object(param_estimate.derfrom)
                        self.add_object(param_estimate.derfrom.file,
                                        export_file=False)

                # Residual Mean Squares Map
                # model_fitting.rms_map.wasGeneratedBy(model_fitting.activity)
                self.add_object(model_fitting.rms_map)
                self.bundle.wasGeneratedBy(model_fitting.rms_map.id,
                                           model_fitting.activity.id)
                self.add_object(model_fitting.rms_map.coord_space)
                self.add_object(model_fitting.rms_map.file)
                if model_fitting.rms_map.derfrom is not None:
                    self.bundle.wasDerivedFrom(
                        model_fitting.rms_map.id,
                        model_fitting.rms_map.derfrom.id)
                    self.add_object(model_fitting.rms_map.derfrom)
                    self.add_object(model_fitting.rms_map.derfrom.file,
                                    export_file=False)

                # Resels per Voxel Map
                if model_fitting.rpv_map is not None:
                    self.add_object(model_fitting.rpv_map)
                    self.bundle.wasGeneratedBy(model_fitting.rpv_map.id,
                                               model_fitting.activity.id)
                    self.add_object(model_fitting.rpv_map.coord_space)
                    self.add_object(model_fitting.rpv_map.file)
                    if model_fitting.rpv_map.inf_id is not None:
                        self.bundle.used(model_fitting.rpv_map.inf_id,
                                         model_fitting.rpv_map.id)
                    if model_fitting.rpv_map.derfrom is not None:
                        self.bundle.wasDerivedFrom(
                            model_fitting.rpv_map.id,
                            model_fitting.rpv_map.derfrom.id)
                        self.add_object(model_fitting.rpv_map.derfrom)
                        self.add_object(model_fitting.rpv_map.derfrom.file,
                                        export_file=False)

                # Mask
                # model_fitting.mask_map.wasGeneratedBy(model_fitting.activity)
                self.bundle.wasGeneratedBy(model_fitting.mask_map.id,
                                           model_fitting.activity.id)
                self.add_object(model_fitting.mask_map)
                if model_fitting.mask_map.derfrom is not None:
                    self.bundle.wasDerivedFrom(
                        model_fitting.mask_map.id,
                        model_fitting.mask_map.derfrom.id)
                    self.add_object(model_fitting.mask_map.derfrom)
                    self.add_object(model_fitting.mask_map.derfrom.file,
                                    export_file=False)

                # Create coordinate space export
                self.add_object(model_fitting.mask_map.coord_space)
                # Create "Mask map" entity
                self.add_object(model_fitting.mask_map.file)

                # Grand Mean map
                # model_fitting.grand_mean_map.wasGeneratedBy(model_fitting.activity)
                self.bundle.wasGeneratedBy(model_fitting.grand_mean_map.id,
                                           model_fitting.activity.id)
                self.add_object(model_fitting.grand_mean_map)
                # Coordinate space entity
                self.add_object(model_fitting.grand_mean_map.coord_space)
                # Grand Mean Map entity
                self.add_object(model_fitting.grand_mean_map.file)

                # Model Parameters Estimation activity
                self.add_object(model_fitting.activity)
                self.bundle.wasAssociatedWith(model_fitting.activity.id,
                                              self.software.id)
                # model_fitting.activity.wasAssociatedWith(self.software)
                # self.add_object(model_fitting)

            # Add contrast estimation steps
            analysis_masks = dict()
            for (model_fitting_id, pe_ids), contrasts in list(
                    self.contrasts.items()):
                for contrast in contrasts:
                    model_fitting = self._get_model_fitting(model_fitting_id)
                    # for contrast in contrasts:
                    # contrast.estimation.used(model_fitting.rms_map)
                    self.bundle.used(contrast.estimation.id,
                                     model_fitting.rms_map.id)
                    # contrast.estimation.used(model_fitting.mask_map)
                    self.bundle.used(contrast.estimation.id,
                                     model_fitting.mask_map.id)
                    analysis_masks[contrast.estimation.id] = \
                        model_fitting.mask_map.id
                    self.bundle.used(contrast.estimation.id,
                                     contrast.weights.id)
                    self.bundle.used(contrast.estimation.id,
                                     model_fitting.design_matrix.id)
                    # contrast.estimation.wasAssociatedWith(self.software)
                    self.bundle.wasAssociatedWith(contrast.estimation.id,
                                                  self.software.id)

                    for pe_id in pe_ids:
                        # contrast.estimation.used(pe_id)
                        self.bundle.used(contrast.estimation.id, pe_id)

                    # Create estimation activity
                    self.add_object(contrast.estimation)

                    # Create contrast weights
                    self.add_object(contrast.weights)

                    if contrast.contrast_map is not None:
                        # Create contrast Map
                        # contrast.contrast_map.wasGeneratedBy(contrast.estimation)
                        self.bundle.wasGeneratedBy(contrast.contrast_map.id,
                                                   contrast.estimation.id)
                        self.add_object(contrast.contrast_map)
                        self.add_object(contrast.contrast_map.coord_space)
                        # Copy contrast map in export directory
                        self.add_object(contrast.contrast_map.file)

                        if contrast.contrast_map.derfrom is not None:
                            self.bundle.wasDerivedFrom(
                                contrast.contrast_map.id,
                                contrast.contrast_map.derfrom.id)
                            self.add_object(contrast.contrast_map.derfrom)
                            self.add_object(contrast.contrast_map.derfrom.file,
                                            export_file=False)

                    # Create Std Err. Map (T-tests) or Explained Mean Sq. Map
                    # (F-tests)
                    # contrast.stderr_or_expl_mean_sq_map.wasGeneratedBy
                    # (contrast.estimation)
                    stderr_explmeansq_map = (
                        contrast.stderr_or_expl_mean_sq_map)
                    self.bundle.wasGeneratedBy(
                        stderr_explmeansq_map.id,
                        contrast.estimation.id)
                    self.add_object(stderr_explmeansq_map)
                    self.add_object(
                        stderr_explmeansq_map.coord_space)
                    if isinstance(stderr_explmeansq_map,
                                  ContrastStdErrMap) and \
                            stderr_explmeansq_map.contrast_var:
                        self.add_object(
                            stderr_explmeansq_map.contrast_var)
                        if stderr_explmeansq_map.var_coord_space:
                            self.add_object(
                                stderr_explmeansq_map.var_coord_space)
                        if stderr_explmeansq_map.contrast_var.coord_space:
                            self.add_object(
                                stderr_explmeansq_map.contrast_var.coord_space)
                        self.add_object(
                            stderr_explmeansq_map.contrast_var.file,
                            export_file=False)
                        self.bundle.wasDerivedFrom(
                            stderr_explmeansq_map.id,
                            stderr_explmeansq_map.contrast_var.id)
                    self.add_object(stderr_explmeansq_map.file)

                    # Create Statistic Map
                    # contrast.stat_map.wasGeneratedBy(contrast.estimation)
                    self.bundle.wasGeneratedBy(contrast.stat_map.id,
                                               contrast.estimation.id)
                    self.add_object(contrast.stat_map)
                    self.add_object(contrast.stat_map.coord_space)
                    # Copy Statistical map in export directory
                    self.add_object(contrast.stat_map.file)

                    if contrast.stat_map.derfrom is not None:
                        self.bundle.wasDerivedFrom(
                            contrast.stat_map.id,
                            contrast.stat_map.derfrom.id)
                        self.add_object(contrast.stat_map.derfrom)
                        self.add_object(contrast.stat_map.derfrom.file,
                                        export_file=False)

                    # Create Z Statistic Map
                    if contrast.z_stat_map:
                        # contrast.z_stat_map.wasGeneratedBy(contrast.estimation)
                        self.bundle.wasGeneratedBy(contrast.z_stat_map.id,
                                                   contrast.estimation.id)
                        self.add_object(contrast.z_stat_map)
                        self.add_object(contrast.z_stat_map.coord_space)
                        # Copy Statistical map in export directory
                        self.add_object(contrast.z_stat_map.file)

                    # self.add_object(contrast)

            # Add inference steps
            for contrast_id, inferences in list(self.inferences.items()):
                contrast = self._get_contrast(contrast_id)

                for inference in inferences:
                    if contrast.z_stat_map:
                        used_id = contrast.z_stat_map.id
                    else:
                        used_id = contrast.stat_map.id
                    # inference.inference_act.used(used_id)
                    self.bundle.used(inference.inference_act.id, used_id)
                    # inference.inference_act.wasAssociatedWith(self.software)
                    self.bundle.wasAssociatedWith(inference.inference_act.id,
                                                  self.software.id)

                    # self.add_object(inference)
                    # Excursion set
                    # inference.excursion_set.wasGeneratedBy(inference.inference_act)
                    self.bundle.wasGeneratedBy(inference.excursion_set.id,
                                               inference.inference_act.id)
                    self.add_object(inference.excursion_set)
                    self.add_object(inference.excursion_set.coord_space)
                    if inference.excursion_set.visu is not None:
                        self.add_object(inference.excursion_set.visu)
                        if inference.excursion_set.visu.file is not None:
                            self.add_object(inference.excursion_set.visu.file)
                    # Copy "Excursion set map" file in export directory
                    self.add_object(inference.excursion_set.file)
                    if inference.excursion_set.clust_map is not None:
                        self.add_object(inference.excursion_set.clust_map)
                        self.add_object(inference.excursion_set.clust_map.file)
                        self.add_object(
                            inference.excursion_set.clust_map.coord_space)

                    if inference.excursion_set.mip is not None:
                        self.add_object(inference.excursion_set.mip)
                        self.add_object(inference.excursion_set.mip.file)

                    # Height threshold
                    if inference.height_thresh.equiv_thresh is not None:
                        for equiv in inference.height_thresh.equiv_thresh:
                            self.add_object(equiv)
                    self.add_object(inference.height_thresh)

                    # Extent threshold
                    if inference.extent_thresh.equiv_thresh is not None:
                        for equiv in inference.extent_thresh.equiv_thresh:
                            self.add_object(equiv)
                    self.add_object(inference.extent_thresh)

                    # Display Mask (potentially more than 1)
                    if inference.disp_mask:
                        for mask in inference.disp_mask:
                            # inference.inference_act.used(mask)
                            self.bundle.used(inference.inference_act.id,
                                             mask.id)
                            self.add_object(mask)
                            # Create coordinate space entity
                            self.add_object(mask.coord_space)
                            # Create "Display Mask Map" entity
                            self.add_object(mask.file)

                            if mask.derfrom is not None:
                                self.bundle.wasDerivedFrom(mask.id,
                                                           mask.derfrom.id)
                                self.add_object(mask.derfrom)
                                self.add_object(mask.derfrom.file,
                                                export_file=False)

                    # Search Space
                    self.bundle.wasGeneratedBy(inference.search_space.id,
                                               inference.inference_act.id)
                    # inference.search_space.wasGeneratedBy(inference.inference_act)
                    self.add_object(inference.search_space)
                    self.add_object(inference.search_space.coord_space)
                    # Copy "Mask map" in export directory
                    self.add_object(inference.search_space.file)

                    # Peak Definition
                    if inference.peak_criteria:
                        # inference.inference_act.used(inference.peak_criteria)
                        self.bundle.used(inference.inference_act.id,
                                         inference.peak_criteria.id)
                        self.add_object(inference.peak_criteria)

                    # Cluster Definition
                    if inference.cluster_criteria:
                        # inference.inference_act.used(inference.cluster_criteria)
                        self.bundle.used(inference.inference_act.id,
                                         inference.cluster_criteria.id)
                        self.add_object(inference.cluster_criteria)

                    if inference.clusters:
                        # Clusters and peaks
                        for cluster in inference.clusters:
                            # cluster.wasDerivedFrom(inference.excursion_set)
                            self.bundle.wasDerivedFrom(
                                cluster.id, inference.excursion_set.id)
                            self.add_object(cluster)
                            for peak in cluster.peaks:
                                self.bundle.wasDerivedFrom(peak.id, cluster.id)
                                self.add_object(peak)
                                self.add_object(peak.coordinate)

                            if cluster.cog is not None:
                                self.bundle.wasDerivedFrom(cluster.cog.id,
                                                           cluster.id)
                                self.add_object(cluster.cog)
                                self.add_object(cluster.cog.coordinate)

                    # Inference activity
                    # inference.inference_act.wasAssociatedWith(inference.software_id)
                    # inference.inference_act.used(inference.height_thresh)
                    self.bundle.used(inference.inference_act.id,
                                     inference.height_thresh.id)
                    # inference.inference_act.used(inference.extent_thresh)
                    self.bundle.used(inference.inference_act.id,
                                     inference.extent_thresh.id)
                    self.bundle.used(inference.inference_act.id,
                                     analysis_masks[contrast.estimation.id])
                    self.add_object(inference.inference_act)

            # Write-out prov file
            self.save_prov_to_files()

            return self.out_dir
        except Exception:
            self.cleanup()
            raise

    def _get_model_fitting(self, mf_id):
        """
        Retreive model fitting with identifier 'mf_id' from the list of model
        fitting objects stored in self.model_fitting
        """
        for model_fitting in self.model_fittings:
            if model_fitting.activity.id == mf_id:
                return model_fitting

        raise Exception("Model fitting activity with id: " + str(mf_id) +
                        " not found.")

    def _get_contrast(self, con_id):
        """
        Retreive contrast with identifier 'con_id' from the list of contrast
        objects stored in self.contrasts
        """
        for contrasts in list(self.contrasts.values()):
            for contrast in contrasts:
                if contrast.estimation.id == con_id:
                    return contrast
        raise Exception("Contrast activity with id: " + str(con_id) +
                        " not found.")

    def _add_namespaces(self):
        """
        Add namespaces to NIDM document.
        """
        self.doc.add_namespace(NIDM)
        self.doc.add_namespace(NIIRI)
        self.doc.add_namespace(CRYPTO)
        self.doc.add_namespace(DCT)
        self.doc.add_namespace(DC)
        self.doc.add_namespace(NFO)
        self.doc.add_namespace(OBO)
        self.doc.add_namespace(SCR)
        self.doc.add_namespace(NIF)

    def _create_bundle(self, version):
        """
        Initialise NIDM-Results bundle.
        """
        # *** Bundle entity
        if not hasattr(self, 'bundle_ent'):
            self.bundle_ent = NIDMResultsBundle(nidm_version=version['num'])

        self.bundle = ProvBundle(identifier=self.bundle_ent.id)

        self.bundle_ent.export(self.version, self.export_dir)

        # # provn export
        # self.bundle = ProvBundle(identifier=bundle_id)

        self.doc.entity(self.bundle_ent.id,
                        other_attributes=self.bundle_ent.attributes)

        # *** NIDM-Results Export Activity
        if version['num'] not in ["1.0.0", "1.1.0"]:
            if not hasattr(self, 'export_act'):
                self.export_act = NIDMResultsExport()
            self.export_act.export(self.version, self.export_dir)
            # self.doc.update(self.export_act.p)
            self.doc.activity(self.export_act.id,
                              other_attributes=self.export_act.attributes)

        # *** bundle was Generated by NIDM-Results Export Activity
        if not hasattr(self, 'export_time'):
            self.export_time = str(datetime.datetime.now().time())

        if version['num'] in ["1.0.0", "1.1.0"]:
            self.doc.wasGeneratedBy(entity=self.bundle_ent.id,
                                    time=self.export_time)
        else:
            # provn
            self.doc.wasGeneratedBy(
                entity=self.bundle_ent.id, activity=self.export_act.id,
                time=self.export_time)

        # *** NIDM-Results Exporter (Software Agent)
        if version['num'] not in ["1.0.0", "1.1.0"]:
            if not hasattr(self, 'exporter'):
                self.exporter = self._get_exporter()
            self.exporter.export(self.version, self.export_dir)
            # self.doc.update(self.exporter.p)
            self.doc.agent(self.exporter.id,
                           other_attributes=self.exporter.attributes)

            self.doc.wasAssociatedWith(self.export_act.id, self.exporter.id)

    def _get_model_parameters_estimations(self, error_model):
        """
        Infer model estimation method from the 'error_model'. Return an object
        of type ModelParametersEstimation.
        """
        if error_model.dependance == NIDM_INDEPEDENT_ERROR:
            if error_model.variance_homo:
                estimation_method = STATO_OLS
            else:
                estimation_method = STATO_WLS
        else:
            estimation_method = STATO_GLS

        mpe = ModelParametersEstimation(estimation_method, self.software.id)

        return mpe

    def use_prefixes(self, ttl):
        prefix_file = os.path.join(os.path.dirname(__file__), 'prefixes.csv')
        context = dict()
        with open(prefix_file, encoding="ascii") as csvfile:
            reader = csv.reader(csvfile)
            next(reader, None)  # skip the headers
            for alphanum_id, prefix, uri in reader:
                if alphanum_id in ttl:
                    context[prefix] = uri
                    ttl = "@prefix " + prefix + ": <" + uri + "> .\n" + ttl
                    ttl = ttl.replace(alphanum_id, prefix + ":")
                    if uri in ttl:
                        ttl = ttl.replace(alphanum_id, prefix + ":")
                elif uri in ttl:
                    context[prefix] = uri
                    ttl = "@prefix " + prefix + ": <" + uri + "> .\n" + ttl
                    ttl = ttl.replace(alphanum_id, prefix + ":")
        return (ttl, context)

    def save_prov_to_files(self, showattributes=False):
        """
        Write-out provn serialisation to nidm.provn.
        """
        self.doc.add_bundle(self.bundle)
        # provn_file = os.path.join(self.export_dir, 'nidm.provn')
        # provn_fid = open(provn_file, 'w')
        # # FIXME None
        # # provn_fid.write(self.doc.get_provn(4).replace("None", "-"))
        # provn_fid.close()

        ttl_file = os.path.join(self.export_dir, 'nidm.ttl')
        ttl_txt = self.doc.serialize(format='rdf', rdf_format='turtle')
        ttl_txt, json_context = self.use_prefixes(ttl_txt)

        # Add namespaces to json-ld context
        for namespace in self.doc._namespaces.get_registered_namespaces():
            json_context[namespace._prefix] = namespace._uri
        for namespace in \
                list(self.doc._namespaces._default_namespaces.values()):
            json_context[namespace._prefix] = namespace._uri
        json_context["xsd"] = "http://www.w3.org/2000/01/rdf-schema#"

        # Work-around to issue with INF value in rdflib (reported in
        # https://github.com/RDFLib/rdflib/pull/655)
        ttl_txt = ttl_txt.replace(' inf ', ' "INF"^^xsd:float ')
        with open(ttl_file, 'w') as ttl_fid:
            ttl_fid.write(ttl_txt)

        # print(json_context)
        jsonld_file = os.path.join(self.export_dir, 'nidm.json')
        jsonld_txt = self.doc.serialize(format='rdf', rdf_format='json-ld',
                                        context=json_context)
        with open(jsonld_file, 'w') as jsonld_fid:
            jsonld_fid.write(jsonld_txt)

        # provjsonld_file = os.path.join(self.export_dir, 'nidm.provjsonld')
        # provjsonld_txt = self.doc.serialize(format='jsonld')
        # with open(provjsonld_file, 'w') as provjsonld_fid:
        #     provjsonld_fid.write(provjsonld_txt)

        # provn_file = os.path.join(self.export_dir, 'nidm.provn')
        # provn_txt = self.doc.serialize(format='provn')
        # with open(provn_file, 'w') as provn_fid:
        #     provn_fid.write(provn_txt)

        # Post-processing
        if not self.zipped:
            # Just rename temp directory to output_path
            os.rename(self.export_dir, self.out_dir)
        else:
            # Create a zip file that contains the content of the temp directory
            os.chdir(self.export_dir)
            zf = zipfile.ZipFile(os.path.join("..", self.out_dir), mode='w')
            try:
                for root, dirnames, filenames in os.walk("."):
                    for filename in filenames:
                        zf.write(os.path.join(filename))
            finally:
                zf.close()
                # Need to move up before deleting the folder
                os.chdir("..")
                shutil.rmtree(os.path.join("..", self.export_dir))
Beispiel #20
0
    def useGenDependency(self, aDO, usedList, genList, throughActivity):

        aID = throughActivity.id

        # create provlet
        d1 = ProvDocument()  # d1 is now an empty provenance document
        d1.add_namespace("dt", "http://cs.ncl.ac.uk/dtsim/")

        usedEntities = []
        for aRO in usedList:
            usedEntities.append(d1.entity(DTns + aRO.id))

        genEntities = []
        for aRO1 in genList:
            genEntities.append(d1.entity(DTns + aRO1.id))

        a = d1.activity(DTns + aID)
        ag1 = d1.agent(DTns + str(aDO.id))

        d1.wasAssociatedWith(a, ag1)
        for ue in usedEntities:
            d1.used(a, ue)

        for gene in genEntities:
            d1.wasAttributedTo(gene, ag1)
            d1.wasGeneratedBy(gene, a)

        # associate this provlet to each generated RO
        for aRO1 in genList:
            aRO1.provlet = d1

        print "event {n}: DO {do}: {ro1} <- wgby <- {act} <- used {ro}".format(
            n=currentReuseCount, do=aDO.id, ro1=aRO1.id, act=aID, ro=aRO.id
        )

        for genRO in genList:
            for uRO in usedList:
                # update upstream pointer
                genRO.upstream.append(
                    (uRO, throughActivity)
                )  # dep on aRO through activity aID   FIXME URGENTLY!!!  not designed for M-M

        for uRO in usedList:
            for genRO in genList:
                # update downstream
                uRO.downstream.append((genRO, throughActivity))  # aR1 is downstream from aR1 through activity aID

        # update global graph
        globalUsedEntities = []
        for aRO in usedList:
            globalUsedEntities.append(pGlobal.entity(DTns + aRO.id))

        globalGenEntities = []
        for aR1 in genList:
            globalGenEntities.append(pGlobal.entity(DTns + aR1.id))

        a = pGlobal.activity(DTns + aID)
        ag1 = pGlobal.agent(DTns + str(aDO.id))

        pGlobal.wasAssociatedWith(a, ag1)
        for ue in globalUsedEntities:
            pGlobal.used(a, ue)

        for gene in globalGenEntities:
            pGlobal.wasAttributedTo(gene, ag1)
            pGlobal.wasGeneratedBy(gene, a)

        # trigger credit recomputation
        # each used RO needs its credit updated with aRO1.credit for each generated aRO1 through activity aID
        aCreditManager.addGenerationCredit(usedList, genList, throughActivity)

        # 		self.notify(d1)
        return d1
Beispiel #21
0
class Provenance(object):
    def __init__(self, output_dir):
        self.output_dir = output_dir
        self.doc = None
        self.workflow = None

    def start(self, workflow=False):
        from daops import __version__ as daops_version
        from housemartin import __version__ as housemartin_version

        self.doc = ProvDocument()
        # Declaring namespaces for various prefixes
        self.doc.set_default_namespace(uri="http://purl.org/roocs/prov#")
        self.doc.add_namespace("prov", uri="http://www.w3.org/ns/prov#")
        self.doc.add_namespace(
            "provone", uri="http://purl.dataone.org/provone/2015/01/15/ontology#"
        )
        self.doc.add_namespace("dcterms", uri="http://purl.org/dc/terms/")
        # Define entities
        project_cds = self.doc.agent(
            ":copernicus_CDS",
            {
                "prov:type": "prov:Organization",
                "dcterms:title": "Copernicus Climate Data Store",
            },
        )
        self.sw_housemartin = self.doc.agent(
            ":housemartin",
            {
                "prov:type": "prov:SoftwareAgent",
                "dcterms:source": f"https://github.com/cedadev/housemartin/releases/tag/v{housemartin_version}",
            },
        )
        self.doc.wasAttributedTo(self.sw_housemartin, project_cds)
        self.sw_daops = self.doc.agent(
            ":daops",
            {
                "prov:type": "prov:SoftwareAgent",
                "dcterms:source": f"https://github.com/roocs/daops/releases/tag/v{daops_version}",
            },
        )
        # workflow
        if workflow is True:
            self.workflow = self.doc.entity(
                ":workflow", {"prov:type": "provone:Workflow"}
            )
            orchestrate = self.doc.activity(
                ":orchestrate",
                other_attributes={
                    "prov:startedAtTime": "2020-11-26T09:15:00",
                    "prov:endedAtTime": "2020-11-26T09:30:00",
                },
            )
            self.doc.wasAssociatedWith(
                orchestrate, agent=self.sw_housemartin, plan=self.workflow
            )

    def add_operator(self, operator, parameters, collection, output):
        op = self.doc.activity(
            f":{operator}",
            other_attributes={
                ":time": parameters.get("time"),
                ":apply_fixes": parameters.get("apply_fixes"),
            },
        )
        # input data
        ds_in = os.path.basename(collection[0])
        # ds_in_attrs = {
        #     'prov:type': 'provone:Data',
        #     'prov:value': f'{ds_in}',
        # }
        op_in = self.doc.entity(f":{ds_in}")
        # operator started by daops
        if self.workflow:
            self.doc.wasAssociatedWith(op, agent=self.sw_daops, plan=self.workflow)
        else:
            self.doc.start(op, starter=self.sw_daops, trigger=self.sw_housemartin)
        # Generated output file

        ds_out = os.path.basename(output[0])
        # ds_out_attrs = {
        #     'prov:type': 'provone:Data',
        #     'prov:value': f'{ds_out}',
        # }
        op_out = self.doc.entity(f":{ds_out}")
        self.doc.wasDerivedFrom(op_out, op_in, activity=op)

    def write_json(self):
        outfile = os.path.join(self.output_dir, "provenance.json")
        self.doc.serialize(outfile, format="json")
        return outfile

    def write_png(self):
        outfile = os.path.join(self.output_dir, "provenance.png")
        figure = prov_to_dot(self.doc)
        figure.write_png(outfile)
        return outfile
Beispiel #22
0
              "name": "",
              "email": ""
          })
add.entity("File",
           other_attributes={
               "prov:type": "file",
               "path_at_addition": ""
           })
add.entity("File Version",
           other_attributes={
               "prov:type": "file_version",
               "old_path": "",
               "new_path": ""
           })
add.wasInformedBy("Commit", "Parent Commit")
add.wasAssociatedWith("Commit", "Committer")
add.wasAssociatedWith("Commit", "Author")
add.wasGeneratedBy("File", "Commit")
add.wasGeneratedBy("File Version", "Commit")
add.wasAttributedTo("File", "Author")
add.wasAttributedTo("File Version", "Author")
add.specializationOf("File Version", "File")

mod = ProvDocument()
mod.set_default_namespace("gitlab2prov:")
mod.activity(
    "Commit",
    other_attributes={
        "prov:type": "commit",
        "title": "",
        "message": "",
def w3c_publication_2():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication2.prov-asn
    # ===========================================================================
    # bundle
    #
    # prefix ex <http://example.org/>
    # prefix rec <http://example.org/record>
    #
    # prefix w3 <http://www.w3.org/TR/2011/>
    # prefix hg <http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/>
    #
    #
    # entity(hg:Overview.html, [ prov:type="file in hg" ])
    # entity(w3:WD-prov-dm-20111215, [ prov:type="html4" ])
    #
    #
    # activity(ex:rcp,-,-,[prov:type="copy directory"])
    #
    # wasGeneratedBy(rec:g; w3:WD-prov-dm-20111215, ex:rcp, -)
    #
    # entity(ex:req3, [ prov:type="http://www.w3.org/2005/08/01-transitions.html#pubreq" %% xsd:anyURI ])
    #
    # used(rec:u; ex:rcp,hg:Overview.html,-)
    # used(ex:rcp, ex:req3, -)
    #
    #
    # wasDerivedFrom(w3:WD-prov-dm-20111215, hg:Overview.html, ex:rcp, rec:g, rec:u)
    #
    # agent(ex:webmaster, [ prov:type='prov:Person' ])
    #
    # wasAssociatedWith(ex:rcp, ex:webmaster, -)
    #
    # endBundle
    # ===========================================================================

    ex = Namespace("ex", "http://example.org/")
    rec = Namespace("rec", "http://example.org/record")
    w3 = Namespace("w3", "http://www.w3.org/TR/2011/")
    hg = Namespace(
        "hg",
        "http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/",
    )

    g = ProvDocument()

    g.entity(hg["Overview.html"], {"prov:type": "file in hg"})
    g.entity(w3["WD-prov-dm-20111215"], {"prov:type": "html4"})

    g.activity(ex["rcp"], None, None, {"prov:type": "copy directory"})

    g.wasGeneratedBy("w3:WD-prov-dm-20111215", "ex:rcp", identifier=rec["g"])

    g.entity(
        "ex:req3",
        {
            "prov:type":
            Identifier("http://www.w3.org/2005/08/01-transitions.html#pubreq")
        },
    )

    g.used("ex:rcp", "hg:Overview.html", identifier="rec:u")
    g.used("ex:rcp", "ex:req3")

    g.wasDerivedFrom("w3:WD-prov-dm-20111215", "hg:Overview.html", "ex:rcp",
                     "rec:g", "rec:u")

    g.agent("ex:webmaster", {"prov:type": "Person"})

    g.wasAssociatedWith("ex:rcp", "ex:webmaster")

    return g
Beispiel #24
0
def w3c_publication_1():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication1.prov-asn
    #===========================================================================
    # bundle
    #
    # prefix ex  <http://example.org/>
    #
    # prefix w3      <http://www.w3.org/>
    # prefix tr      <http://www.w3.org/TR/2011/>
    # prefix process <http://www.w3.org/2005/10/Process-20051014/tr.html#>
    # prefix email   <https://lists.w3.org/Archives/Member/w3c-archive/>
    # prefix chairs  <https://lists.w3.org/Archives/Member/chairs/>
    # prefix trans   <http://www.w3.org/2005/08/01-transitions.html#>
    # prefix rec54   <http://www.w3.org/2001/02pd/rec54#>
    #
    #
    #  entity(tr:WD-prov-dm-20111018, [ prov:type='rec54:WD' ])
    #  entity(tr:WD-prov-dm-20111215, [ prov:type='rec54:WD' ])
    #  entity(process:rec-advance,    [ prov:type='prov:Plan' ])
    #
    #
    #  entity(chairs:2011OctDec/0004, [ prov:type='trans:transreq' ])
    #  entity(email:2011Oct/0141,     [ prov:type='trans:pubreq' ])
    #  entity(email:2011Dec/0111,     [ prov:type='trans:pubreq' ])
    #
    #
    #  wasDerivedFrom(tr:WD-prov-dm-20111215, tr:WD-prov-dm-20111018)
    #
    #
    #  activity(ex:act1,-,-,[prov:type="publish"])
    #  activity(ex:act2,-,-,[prov:type="publish"])
    #
    #  wasGeneratedBy(tr:WD-prov-dm-20111018, ex:act1, -)
    #  wasGeneratedBy(tr:WD-prov-dm-20111215, ex:act2, -)
    #
    #  used(ex:act1, chairs:2011OctDec/0004, -)
    #  used(ex:act1, email:2011Oct/0141, -)
    #  used(ex:act2, email:2011Dec/0111, -)
    #
    #  agent(w3:Consortium, [ prov:type='prov:Organization' ])
    #
    #  wasAssociatedWith(ex:act1, w3:Consortium, process:rec-advance)
    #  wasAssociatedWith(ex:act2, w3:Consortium, process:rec-advance)
    #
    # endBundle
    #===========================================================================

    g = ProvDocument()
    g.add_namespace('ex', 'http://example.org/')
    g.add_namespace('w3', 'http://www.w3.org/')
    g.add_namespace('tr', 'http://www.w3.org/TR/2011/')
    g.add_namespace('process',
                    'http://www.w3.org/2005/10/Process-20051014/tr.html#')
    g.add_namespace('email',
                    'https://lists.w3.org/Archives/Member/w3c-archive/')
    g.add_namespace('chairs', 'https://lists.w3.org/Archives/Member/chairs/')
    g.add_namespace('trans', 'http://www.w3.org/2005/08/01-transitions.html#')
    g.add_namespace('rec54', 'http://www.w3.org/2001/02pd/rec54#')

    g.entity('tr:WD-prov-dm-20111018', {'prov:type': 'rec54:WD'})
    g.entity('tr:WD-prov-dm-20111215', {'prov:type': 'rec54:WD'})
    g.entity('process:rec-advance', {'prov:type': 'prov:Plan'})

    g.entity('chairs:2011OctDec/0004', {'prov:type': 'trans:transreq'})
    g.entity('email:2011Oct/0141', {'prov:type': 'trans:pubreq'})
    g.entity('email:2011Dec/0111', {'prov:type': 'trans:pubreq'})

    g.wasDerivedFrom('tr:WD-prov-dm-20111215', 'tr:WD-prov-dm-20111018')

    g.activity('ex:act1', other_attributes={'prov:type': "publish"})
    g.activity('ex:act2', other_attributes={'prov:type': "publish"})

    g.wasGeneratedBy('tr:WD-prov-dm-20111018', 'ex:act1')
    g.wasGeneratedBy('tr:WD-prov-dm-20111215', 'ex:act2')

    g.used('ex:act1', 'chairs:2011OctDec/0004')
    g.used('ex:act1', 'email:2011Oct/0141')
    g.used('ex:act2', 'email:2011Dec/0111')

    g.agent('w3:Consortium', other_attributes={'prov:type': "Organization"})

    g.wasAssociatedWith('ex:act1', 'w3:Consortium', 'process:rec-advance')
    g.wasAssociatedWith('ex:act2', 'w3:Consortium', 'process:rec-advance')

    return g
Beispiel #25
0
class ProvenanceProfile:
    """
    Provenance profile.

    Populated as the workflow runs.
    """
    def __init__(
        self,
        research_object: "ResearchObject",
        full_name: str,
        host_provenance: bool,
        user_provenance: bool,
        orcid: str,
        fsaccess: StdFsAccess,
        run_uuid: Optional[uuid.UUID] = None,
    ) -> None:
        """Initialize the provenance profile."""
        self.fsaccess = fsaccess
        self.orcid = orcid
        self.research_object = research_object
        self.folder = self.research_object.folder
        self.document = ProvDocument()
        self.host_provenance = host_provenance
        self.user_provenance = user_provenance
        self.engine_uuid = research_object.engine_uuid  # type: str
        self.add_to_manifest = self.research_object.add_to_manifest
        if self.orcid:
            _logger.debug("[provenance] Creator ORCID: %s", self.orcid)
        self.full_name = full_name
        if self.full_name:
            _logger.debug("[provenance] Creator Full name: %s", self.full_name)
        self.workflow_run_uuid = run_uuid or uuid.uuid4()
        self.workflow_run_uri = self.workflow_run_uuid.urn  # type: str
        self.generate_prov_doc()

    def __str__(self) -> str:
        """Represent this Provenvance profile as a string."""
        return "ProvenanceProfile <{}> in <{}>".format(
            self.workflow_run_uri,
            self.research_object,
        )

    def generate_prov_doc(self) -> Tuple[str, ProvDocument]:
        """Add basic namespaces."""
        def host_provenance(document: ProvDocument) -> None:
            """Record host provenance."""
            document.add_namespace(CWLPROV)
            document.add_namespace(UUID)
            document.add_namespace(FOAF)

            hostname = getfqdn()
            # won't have a foaf:accountServiceHomepage for unix hosts, but
            # we can at least provide hostname
            document.agent(
                ACCOUNT_UUID,
                {
                    PROV_TYPE: FOAF["OnlineAccount"],
                    "prov:location": hostname,
                    CWLPROV["hostname"]: hostname,
                },
            )

        self.cwltool_version = "cwltool %s" % versionstring().split()[-1]
        self.document.add_namespace("wfprov",
                                    "http://purl.org/wf4ever/wfprov#")
        # document.add_namespace('prov', 'http://www.w3.org/ns/prov#')
        self.document.add_namespace("wfdesc",
                                    "http://purl.org/wf4ever/wfdesc#")
        # TODO: Make this ontology. For now only has cwlprov:image
        self.document.add_namespace("cwlprov", "https://w3id.org/cwl/prov#")
        self.document.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")
        self.document.add_namespace("schema", "http://schema.org/")
        self.document.add_namespace("orcid", "https://orcid.org/")
        self.document.add_namespace("id", "urn:uuid:")
        # NOTE: Internet draft expired 2004-03-04 (!)
        #  https://tools.ietf.org/html/draft-thiemann-hash-urn-01
        # TODO: Change to nih:sha-256; hashes
        #  https://tools.ietf.org/html/rfc6920#section-7
        self.document.add_namespace("data", "urn:hash::sha1:")
        # Also needed for docker images
        self.document.add_namespace(SHA256, "nih:sha-256;")

        # info only, won't really be used by prov as sub-resources use /
        self.document.add_namespace("researchobject",
                                    self.research_object.base_uri)
        # annotations
        self.metadata_ns = self.document.add_namespace(
            "metadata", self.research_object.base_uri + METADATA + "/")
        # Pre-register provenance directory so we can refer to its files
        self.provenance_ns = self.document.add_namespace(
            "provenance",
            self.research_object.base_uri + posix_path(PROVENANCE) + "/")
        ro_identifier_workflow = self.research_object.base_uri + "workflow/packed.cwl#"
        self.wf_ns = self.document.add_namespace("wf", ro_identifier_workflow)
        ro_identifier_input = (self.research_object.base_uri +
                               "workflow/primary-job.json#")
        self.document.add_namespace("input", ro_identifier_input)

        # More info about the account (e.g. username, fullname)
        # may or may not have been previously logged by user_provenance()
        # .. but we always know cwltool was launched (directly or indirectly)
        # by a user account, as cwltool is a command line tool
        account = self.document.agent(ACCOUNT_UUID)
        if self.orcid or self.full_name:
            person = {PROV_TYPE: PROV["Person"], "prov:type": SCHEMA["Person"]}
            if self.full_name:
                person["prov:label"] = self.full_name
                person["foaf:name"] = self.full_name
                person["schema:name"] = self.full_name
            else:
                # TODO: Look up name from ORCID API?
                pass
            agent = self.document.agent(self.orcid or uuid.uuid4().urn, person)
            self.document.actedOnBehalfOf(account, agent)
        else:
            if self.host_provenance:
                host_provenance(self.document)
            if self.user_provenance:
                self.research_object.user_provenance(self.document)
        # The execution of cwltool
        wfengine = self.document.agent(
            self.engine_uuid,
            {
                PROV_TYPE: PROV["SoftwareAgent"],
                "prov:type": WFPROV["WorkflowEngine"],
                "prov:label": self.cwltool_version,
            },
        )
        # FIXME: This datetime will be a bit too delayed, we should
        # capture when cwltool.py earliest started?
        self.document.wasStartedBy(wfengine, None, account,
                                   datetime.datetime.now())
        # define workflow run level activity
        self.document.activity(
            self.workflow_run_uri,
            datetime.datetime.now(),
            None,
            {
                PROV_TYPE: WFPROV["WorkflowRun"],
                "prov:label": "Run of workflow/packed.cwl#main",
            },
        )
        # association between SoftwareAgent and WorkflowRun
        main_workflow = "wf:main"
        self.document.wasAssociatedWith(self.workflow_run_uri,
                                        self.engine_uuid, main_workflow)
        self.document.wasStartedBy(self.workflow_run_uri, None,
                                   self.engine_uuid, datetime.datetime.now())
        return (self.workflow_run_uri, self.document)

    def evaluate(
        self,
        process: Process,
        job: JobsType,
        job_order_object: CWLObjectType,
        research_obj: "ResearchObject",
    ) -> None:
        """Evaluate the nature of job."""
        if not hasattr(process, "steps"):
            # record provenance of independent commandline tool executions
            self.prospective_prov(job)
            customised_job = copy_job_order(job, job_order_object)
            self.used_artefacts(customised_job, self.workflow_run_uri)
            research_obj.create_job(customised_job)
        elif hasattr(job, "workflow"):
            # record provenance of workflow executions
            self.prospective_prov(job)
            customised_job = copy_job_order(job, job_order_object)
            self.used_artefacts(customised_job, self.workflow_run_uri)

    def record_process_start(
            self,
            process: Process,
            job: JobsType,
            process_run_id: Optional[str] = None) -> Optional[str]:
        if not hasattr(process, "steps"):
            process_run_id = self.workflow_run_uri
        elif not hasattr(job, "workflow"):
            # commandline tool execution as part of workflow
            name = ""
            if isinstance(job, (CommandLineJob, JobBase, WorkflowJob)):
                name = job.name
            process_name = urllib.parse.quote(name, safe=":/,#")
            process_run_id = self.start_process(process_name,
                                                datetime.datetime.now())
        return process_run_id

    def start_process(
        self,
        process_name: str,
        when: datetime.datetime,
        process_run_id: Optional[str] = None,
    ) -> str:
        """Record the start of each Process."""
        if process_run_id is None:
            process_run_id = uuid.uuid4().urn
        prov_label = "Run of workflow/packed.cwl#main/" + process_name
        self.document.activity(
            process_run_id,
            None,
            None,
            {
                PROV_TYPE: WFPROV["ProcessRun"],
                PROV_LABEL: prov_label
            },
        )
        self.document.wasAssociatedWith(process_run_id, self.engine_uuid,
                                        str("wf:main/" + process_name))
        self.document.wasStartedBy(process_run_id, None, self.workflow_run_uri,
                                   when, None, None)
        return process_run_id

    def record_process_end(
        self,
        process_name: str,
        process_run_id: str,
        outputs: Union[CWLObjectType, MutableSequence[CWLObjectType], None],
        when: datetime.datetime,
    ) -> None:
        self.generate_output_prov(outputs, process_run_id, process_name)
        self.document.wasEndedBy(process_run_id, None, self.workflow_run_uri,
                                 when)

    def declare_file(
            self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, str]:
        if value["class"] != "File":
            raise ValueError("Must have class:File: %s" % value)
        # Need to determine file hash aka RO filename
        entity = None  # type: Optional[ProvEntity]
        checksum = None
        if "checksum" in value:
            csum = cast(str, value["checksum"])
            (method, checksum) = csum.split("$", 1)
            if method == SHA1 and self.research_object.has_data_file(checksum):
                entity = self.document.entity("data:" + checksum)

        if not entity and "location" in value:
            location = str(value["location"])
            # If we made it here, we'll have to add it to the RO
            with self.fsaccess.open(location, "rb") as fhandle:
                relative_path = self.research_object.add_data_file(fhandle)
                # FIXME: This naively relies on add_data_file setting hash as filename
                checksum = PurePath(relative_path).name
                entity = self.document.entity("data:" + checksum,
                                              {PROV_TYPE: WFPROV["Artifact"]})
                if "checksum" not in value:
                    value["checksum"] = f"{SHA1}${checksum}"

        if not entity and "contents" in value:
            # Anonymous file, add content as string
            entity, checksum = self.declare_string(cast(
                str, value["contents"]))

        # By here one of them should have worked!
        if not entity or not checksum:
            raise ValueError(
                "class:File but missing checksum/location/content: %r" % value)

        # Track filename and extension, this is generally useful only for
        # secondaryFiles. Note that multiple uses of a file might thus record
        # different names for the same entity, so we'll
        # make/track a specialized entity by UUID
        file_id = value.setdefault("@id", uuid.uuid4().urn)
        # A specialized entity that has just these names
        file_entity = self.document.entity(
            file_id,
            [(PROV_TYPE, WFPROV["Artifact"]), (PROV_TYPE, WF4EVER["File"])],
        )  # type: ProvEntity

        if "basename" in value:
            file_entity.add_attributes(
                {CWLPROV["basename"]: value["basename"]})
        if "nameroot" in value:
            file_entity.add_attributes(
                {CWLPROV["nameroot"]: value["nameroot"]})
        if "nameext" in value:
            file_entity.add_attributes({CWLPROV["nameext"]: value["nameext"]})
        self.document.specializationOf(file_entity, entity)

        # Check for secondaries
        for sec in cast(MutableSequence[CWLObjectType],
                        value.get("secondaryFiles", [])):
            # TODO: Record these in a specializationOf entity with UUID?
            if sec["class"] == "File":
                (sec_entity, _, _) = self.declare_file(sec)
            elif sec["class"] == "Directory":
                sec_entity = self.declare_directory(sec)
            else:
                raise ValueError(f"Got unexpected secondaryFiles value: {sec}")
            # We don't know how/when/where the secondary file was generated,
            # but CWL convention is a kind of summary/index derived
            # from the original file. As its generally in a different format
            # then prov:Quotation is not appropriate.
            self.document.derivation(
                sec_entity,
                file_entity,
                other_attributes={PROV["type"]: CWLPROV["SecondaryFile"]},
            )

        return file_entity, entity, checksum

    def declare_directory(self, value: CWLObjectType) -> ProvEntity:
        """Register any nested files/directories."""
        # FIXME: Calculate a hash-like identifier for directory
        # so we get same value if it's the same filenames/hashes
        # in a different location.
        # For now, mint a new UUID to identify this directory, but
        # attempt to keep it inside the value dictionary
        dir_id = cast(str, value.setdefault("@id", uuid.uuid4().urn))

        # New annotation file to keep the ORE Folder listing
        ore_doc_fn = dir_id.replace("urn:uuid:", "directory-") + ".ttl"
        dir_bundle = self.document.bundle(self.metadata_ns[ore_doc_fn])

        coll = self.document.entity(
            dir_id,
            [
                (PROV_TYPE, WFPROV["Artifact"]),
                (PROV_TYPE, PROV["Collection"]),
                (PROV_TYPE, PROV["Dictionary"]),
                (PROV_TYPE, RO["Folder"]),
            ],
        )
        # ORE description of ro:Folder, saved separately
        coll_b = dir_bundle.entity(
            dir_id,
            [(PROV_TYPE, RO["Folder"]), (PROV_TYPE, ORE["Aggregation"])],
        )
        self.document.mentionOf(dir_id + "#ore", dir_id, dir_bundle.identifier)

        # dir_manifest = dir_bundle.entity(
        #     dir_bundle.identifier, {PROV["type"]: ORE["ResourceMap"],
        #                             ORE["describes"]: coll_b.identifier})

        coll_attribs = [(ORE["isDescribedBy"], dir_bundle.identifier)]
        coll_b_attribs = []  # type: List[Tuple[Identifier, ProvEntity]]

        # FIXME: .listing might not be populated yet - hopefully
        # a later call to this method will sort that
        is_empty = True

        if "listing" not in value:
            get_listing(self.fsaccess, value)
        for entry in cast(MutableSequence[CWLObjectType],
                          value.get("listing", [])):
            is_empty = False
            # Declare child-artifacts
            entity = self.declare_artefact(entry)
            self.document.membership(coll, entity)
            # Membership relation aka our ORE Proxy
            m_id = uuid.uuid4().urn
            m_entity = self.document.entity(m_id)
            m_b = dir_bundle.entity(m_id)

            # PROV-O style Dictionary
            # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
            # ..as prov.py do not currently allow PROV-N extensions
            # like hadDictionaryMember(..)
            m_entity.add_asserted_type(PROV["KeyEntityPair"])

            m_entity.add_attributes({
                PROV["pairKey"]: entry["basename"],
                PROV["pairEntity"]: entity,
            })

            # As well as a being a
            # http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry
            m_b.add_asserted_type(RO["FolderEntry"])
            m_b.add_asserted_type(ORE["Proxy"])
            m_b.add_attributes({
                RO["entryName"]: entry["basename"],
                ORE["proxyIn"]: coll,
                ORE["proxyFor"]: entity,
            })
            coll_attribs.append((PROV["hadDictionaryMember"], m_entity))
            coll_b_attribs.append((ORE["aggregates"], m_b))

        coll.add_attributes(coll_attribs)
        coll_b.add_attributes(coll_b_attribs)

        # Also Save ORE Folder as annotation metadata
        ore_doc = ProvDocument()
        ore_doc.add_namespace(ORE)
        ore_doc.add_namespace(RO)
        ore_doc.add_namespace(UUID)
        ore_doc.add_bundle(dir_bundle)
        ore_doc = ore_doc.flattened()
        ore_doc_path = str(PurePosixPath(METADATA, ore_doc_fn))
        with self.research_object.write_bag_file(
                ore_doc_path) as provenance_file:
            ore_doc.serialize(provenance_file,
                              format="rdf",
                              rdf_format="turtle")
        self.research_object.add_annotation(dir_id, [ore_doc_fn],
                                            ORE["isDescribedBy"].uri)

        if is_empty:
            # Empty directory
            coll.add_asserted_type(PROV["EmptyCollection"])
            coll.add_asserted_type(PROV["EmptyDictionary"])
        self.research_object.add_uri(coll.identifier.uri)
        return coll

    def declare_string(self, value: str) -> Tuple[ProvEntity, str]:
        """Save as string in UTF-8."""
        byte_s = BytesIO(str(value).encode(ENCODING))
        data_file = self.research_object.add_data_file(byte_s,
                                                       content_type=TEXT_PLAIN)
        checksum = PurePosixPath(data_file).name
        # FIXME: Don't naively assume add_data_file uses hash in filename!
        data_id = "data:%s" % PurePosixPath(data_file).stem
        entity = self.document.entity(data_id, {
            PROV_TYPE: WFPROV["Artifact"],
            PROV_VALUE: str(value)
        })  # type: ProvEntity
        return entity, checksum

    def declare_artefact(self, value: Optional[CWLOutputType]) -> ProvEntity:
        """Create data artefact entities for all file objects."""
        if value is None:
            # FIXME: If this can happen in CWL, we'll
            # need a better way to represent this in PROV
            return self.document.entity(CWLPROV["None"], {PROV_LABEL: "None"})

        if isinstance(value, (bool, int, float)):
            # Typically used in job documents for flags

            # FIXME: Make consistent hash URIs for these
            # that somehow include the type
            # (so "1" != 1 != "1.0" != true)
            entity = self.document.entity(uuid.uuid4().urn,
                                          {PROV_VALUE: value})
            self.research_object.add_uri(entity.identifier.uri)
            return entity

        if isinstance(value, (str, str)):
            (entity, _) = self.declare_string(value)
            return entity

        if isinstance(value, bytes):
            # If we got here then we must be in Python 3
            byte_s = BytesIO(value)
            data_file = self.research_object.add_data_file(byte_s)
            # FIXME: Don't naively assume add_data_file uses hash in filename!
            data_id = "data:%s" % PurePosixPath(data_file).stem
            return self.document.entity(
                data_id,
                {
                    PROV_TYPE: WFPROV["Artifact"],
                    PROV_VALUE: str(value)
                },
            )

        if isinstance(value, MutableMapping):
            if "@id" in value:
                # Already processed this value, but it might not be in this PROV
                entities = self.document.get_record(value["@id"])
                if entities:
                    return entities[0]
                # else, unknown in PROV, re-add below as if it's fresh

            # Base case - we found a File we need to update
            if value.get("class") == "File":
                (entity, _, _) = self.declare_file(value)
                value["@id"] = entity.identifier.uri
                return entity

            if value.get("class") == "Directory":
                entity = self.declare_directory(value)
                value["@id"] = entity.identifier.uri
                return entity
            coll_id = value.setdefault("@id", uuid.uuid4().urn)
            # some other kind of dictionary?
            # TODO: also Save as JSON
            coll = self.document.entity(
                coll_id,
                [
                    (PROV_TYPE, WFPROV["Artifact"]),
                    (PROV_TYPE, PROV["Collection"]),
                    (PROV_TYPE, PROV["Dictionary"]),
                ],
            )

            if value.get("class"):
                _logger.warning("Unknown data class %s.", value["class"])
                # FIXME: The class might be "http://example.com/somethingelse"
                coll.add_asserted_type(CWLPROV[value["class"]])

            # Let's iterate and recurse
            coll_attribs = []  # type: List[Tuple[Identifier, ProvEntity]]
            for (key, val) in value.items():
                v_ent = self.declare_artefact(val)
                self.document.membership(coll, v_ent)
                m_entity = self.document.entity(uuid.uuid4().urn)
                # Note: only support PROV-O style dictionary
                # https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
                # as prov.py do not easily allow PROV-N extensions
                m_entity.add_asserted_type(PROV["KeyEntityPair"])
                m_entity.add_attributes({
                    PROV["pairKey"]: str(key),
                    PROV["pairEntity"]: v_ent
                })
                coll_attribs.append((PROV["hadDictionaryMember"], m_entity))
            coll.add_attributes(coll_attribs)
            self.research_object.add_uri(coll.identifier.uri)
            return coll

        # some other kind of Collection?
        # TODO: also save as JSON
        try:
            members = []
            for each_input_obj in iter(value):
                # Recurse and register any nested objects
                e = self.declare_artefact(each_input_obj)
                members.append(e)

            # If we reached this, then we were allowed to iterate
            coll = self.document.entity(
                uuid.uuid4().urn,
                [
                    (PROV_TYPE, WFPROV["Artifact"]),
                    (PROV_TYPE, PROV["Collection"]),
                ],
            )
            if not members:
                coll.add_asserted_type(PROV["EmptyCollection"])
            else:
                for member in members:
                    # FIXME: This won't preserve order, for that
                    # we would need to use PROV.Dictionary
                    # with numeric keys
                    self.document.membership(coll, member)
            self.research_object.add_uri(coll.identifier.uri)
            # FIXME: list value does not support adding "@id"
            return coll
        except TypeError:
            _logger.warning("Unrecognized type %s of %r", type(value), value)
            # Let's just fall back to Python repr()
            entity = self.document.entity(uuid.uuid4().urn,
                                          {PROV_LABEL: repr(value)})
            self.research_object.add_uri(entity.identifier.uri)
            return entity

    def used_artefacts(
        self,
        job_order: Union[CWLObjectType, List[CWLObjectType]],
        process_run_id: str,
        name: Optional[str] = None,
    ) -> None:
        """Add used() for each data artefact."""
        if isinstance(job_order, list):
            for entry in job_order:
                self.used_artefacts(entry, process_run_id, name)
        else:
            # FIXME: Use workflow name in packed.cwl, "main" is wrong for nested workflows
            base = "main"
            if name is not None:
                base += "/" + name
            for key, value in job_order.items():
                prov_role = self.wf_ns[f"{base}/{key}"]
                try:
                    entity = self.declare_artefact(value)
                    self.document.used(
                        process_run_id,
                        entity,
                        datetime.datetime.now(),
                        None,
                        {"prov:role": prov_role},
                    )
                except OSError:
                    pass

    def generate_output_prov(
        self,
        final_output: Union[CWLObjectType, MutableSequence[CWLObjectType],
                            None],
        process_run_id: Optional[str],
        name: Optional[str],
    ) -> None:
        """Call wasGeneratedBy() for each output,copy the files into the RO."""
        if isinstance(final_output, MutableSequence):
            for entry in final_output:
                self.generate_output_prov(entry, process_run_id, name)
        elif final_output is not None:
            # Timestamp should be created at the earliest
            timestamp = datetime.datetime.now()

            # For each output, find/register the corresponding
            # entity (UUID) and document it as generated in
            # a role corresponding to the output
            for output, value in final_output.items():
                entity = self.declare_artefact(value)
                if name is not None:
                    name = urllib.parse.quote(str(name), safe=":/,#")
                    # FIXME: Probably not "main" in nested workflows
                    role = self.wf_ns[f"main/{name}/{output}"]
                else:
                    role = self.wf_ns["main/%s" % output]

                if not process_run_id:
                    process_run_id = self.workflow_run_uri

                self.document.wasGeneratedBy(entity, process_run_id, timestamp,
                                             None, {"prov:role": role})

    def prospective_prov(self, job: JobsType) -> None:
        """Create prospective prov recording as wfdesc prov:Plan."""
        if not isinstance(job, WorkflowJob):
            # direct command line tool execution
            self.document.entity(
                "wf:main",
                {
                    PROV_TYPE: WFDESC["Process"],
                    "prov:type": PROV["Plan"],
                    "prov:label": "Prospective provenance",
                },
            )
            return

        self.document.entity(
            "wf:main",
            {
                PROV_TYPE: WFDESC["Workflow"],
                "prov:type": PROV["Plan"],
                "prov:label": "Prospective provenance",
            },
        )

        for step in job.steps:
            stepnametemp = "wf:main/" + str(step.name)[5:]
            stepname = urllib.parse.quote(stepnametemp, safe=":/,#")
            provstep = self.document.entity(
                stepname,
                {
                    PROV_TYPE: WFDESC["Process"],
                    "prov:type": PROV["Plan"]
                },
            )
            self.document.entity(
                "wf:main",
                {
                    "wfdesc:hasSubProcess": provstep,
                    "prov:label": "Prospective provenance",
                },
            )
        # TODO: Declare roles/parameters as well

    def activity_has_provenance(self, activity, prov_ids):
        # type: (str, List[Identifier]) -> None
        """Add http://www.w3.org/TR/prov-aq/ relations to nested PROV files."""
        # NOTE: The below will only work if the corresponding metadata/provenance arcp URI
        # is a pre-registered namespace in the PROV Document
        attribs = [(PROV["has_provenance"], prov_id) for prov_id in prov_ids]
        self.document.activity(activity, other_attributes=attribs)
        # Tip: we can't use https://www.w3.org/TR/prov-links/#term-mention
        # as prov:mentionOf() is only for entities, not activities
        uris = [i.uri for i in prov_ids]
        self.research_object.add_annotation(activity, uris,
                                            PROV["has_provenance"].uri)

    def finalize_prov_profile(self, name):
        # type: (Optional[str]) -> List[Identifier]
        """Transfer the provenance related files to the RO."""
        # NOTE: Relative posix path
        if name is None:
            # main workflow, fixed filenames
            filename = "primary.cwlprov"
        else:
            # ASCII-friendly filename, avoiding % as we don't want %2520 in manifest.json
            wf_name = urllib.parse.quote(str(name), safe="").replace("%", "_")
            # Note that the above could cause overlaps for similarly named
            # workflows, but that's OK as we'll also include run uuid
            # which also covers thhe case of this step being run in
            # multiple places or iterations
            filename = f"{wf_name}.{self.workflow_run_uuid}.cwlprov"

        basename = str(PurePosixPath(PROVENANCE) / filename)

        # TODO: Also support other profiles than CWLProv, e.g. ProvOne

        # list of prov identifiers of provenance files
        prov_ids = []

        # https://www.w3.org/TR/prov-xml/
        with self.research_object.write_bag_file(basename +
                                                 ".xml") as provenance_file:
            self.document.serialize(provenance_file, format="xml", indent=4)
            prov_ids.append(self.provenance_ns[filename + ".xml"])

        # https://www.w3.org/TR/prov-n/
        with self.research_object.write_bag_file(basename +
                                                 ".provn") as provenance_file:
            self.document.serialize(provenance_file, format="provn", indent=2)
            prov_ids.append(self.provenance_ns[filename + ".provn"])

        # https://www.w3.org/Submission/prov-json/
        with self.research_object.write_bag_file(basename +
                                                 ".json") as provenance_file:
            self.document.serialize(provenance_file, format="json", indent=2)
            prov_ids.append(self.provenance_ns[filename + ".json"])

        # "rdf" aka https://www.w3.org/TR/prov-o/
        # which can be serialized to ttl/nt/jsonld (and more!)

        # https://www.w3.org/TR/turtle/
        with self.research_object.write_bag_file(basename +
                                                 ".ttl") as provenance_file:
            self.document.serialize(provenance_file,
                                    format="rdf",
                                    rdf_format="turtle")
            prov_ids.append(self.provenance_ns[filename + ".ttl"])

        # https://www.w3.org/TR/n-triples/
        with self.research_object.write_bag_file(basename +
                                                 ".nt") as provenance_file:
            self.document.serialize(provenance_file,
                                    format="rdf",
                                    rdf_format="ntriples")
            prov_ids.append(self.provenance_ns[filename + ".nt"])

        # https://www.w3.org/TR/json-ld/
        # TODO: Use a nice JSON-LD context
        # see also https://eprints.soton.ac.uk/395985/
        # 404 Not Found on https://provenance.ecs.soton.ac.uk/prov.jsonld :(
        with self.research_object.write_bag_file(basename +
                                                 ".jsonld") as provenance_file:
            self.document.serialize(provenance_file,
                                    format="rdf",
                                    rdf_format="json-ld")
            prov_ids.append(self.provenance_ns[filename + ".jsonld"])

        _logger.debug("[provenance] added provenance: %s", prov_ids)
        return prov_ids
Beispiel #26
0
class NIDMExporter():
    """
    Generic class to parse a result directory to extract the pieces of
    information to be stored in NIDM-Results and to generate a NIDM-Results
    export.
    """
    def __init__(self, version, out_dir, zipped=True):
        out_dirname = os.path.basename(out_dir)
        out_path = os.path.dirname(out_dir)

        # Create output path from output name
        self.zipped = zipped
        if not self.zipped:
            out_dirname = out_dirname + ".nidm"
        else:
            out_dirname = out_dirname + ".nidm.zip"
        out_dir = os.path.join(out_path, out_dirname)

        # Quit if output path already exists and user doesn't want to overwrite
        # it
        if os.path.exists(out_dir):
            msg = out_dir + " already exists, overwrite?"
            if not input("%s (y/N) " % msg).lower() == 'y':
                quit("Bye.")
            if os.path.isdir(out_dir):
                shutil.rmtree(out_dir)
            else:
                os.remove(out_dir)
        self.out_dir = out_dir

        if version == "dev":
            self.version = {
                'major': 10000,
                'minor': 0,
                'revision': 0,
                'num': version
            }
        else:
            major, minor, revision = version.split(".")
            if "-rc" in revision:
                revision, rc = revision.split("-rc")
            else:
                rc = -1
            self.version = {
                'major': int(major),
                'minor': int(minor),
                'revision': int(revision),
                'rc': int(rc),
                'num': version
            }

        # Initialise prov document
        self.doc = ProvDocument()
        self._add_namespaces()

        # A temp directory that will contain the exported data
        self.export_dir = tempfile.mkdtemp(prefix="nidm-", dir=out_path)

        self.prepend_path = ''

    def parse(self):
        """
        Parse a result directory to extract the pieces information to be
        stored in NIDM-Results.
        """

        try:
            # Methods: find_software, find_model_fitting, find_contrasts and
            # find_inferences should be defined in the children classes and
            # return a list of NIDM Objects as specified in the objects module

            # Object of type Software describing the neuroimaging software
            # package used for the analysis
            self.software = self._find_software()

            # List of objects of type ModelFitting describing the
            # model fitting step in NIDM-Results (main activity: Model
            # Parameters Estimation)
            self.model_fittings = self._find_model_fitting()

            # Dictionary of (key, value) pairs where where key is a tuple
            # containing the identifier of a ModelParametersEstimation object
            # and a tuple of identifiers of ParameterEstimateMap objects and
            # value is an object of type Contrast describing the contrast
            # estimation step in NIDM-Results (main activity: Contrast
            # Estimation)
            self.contrasts = self._find_contrasts()

            # Inference activity and entities
            # Dictionary of (key, value) pairs where key is the identifier of a
            # ContrastEstimation object and value is an object of type
            # Inference describing the inference step in NIDM-Results (main
            # activity: Inference)
            self.inferences = self._find_inferences()
        except Exception:
            self.cleanup()
            raise

    def cleanup(self):
        if os.path.isdir(self.export_dir):
            shutil.rmtree(self.export_dir)

    def add_object(self, nidm_object, export_file=True):
        """
        Add a NIDMObject to a NIDM-Results export.
        """
        if not export_file:
            export_dir = None
        else:
            export_dir = self.export_dir

        if not isinstance(nidm_object, NIDMFile):
            nidm_object.export(self.version, export_dir)
        else:
            nidm_object.export(self.version, export_dir, self.prepend_path)
        # ProvDocument: add object to the bundle
        if nidm_object.prov_type == PROV['Activity']:
            self.bundle.activity(nidm_object.id,
                                 other_attributes=nidm_object.attributes)
        elif nidm_object.prov_type == PROV['Entity']:
            self.bundle.entity(nidm_object.id,
                               other_attributes=nidm_object.attributes)
        elif nidm_object.prov_type == PROV['Agent']:
            self.bundle.agent(nidm_object.id,
                              other_attributes=nidm_object.attributes)
        # self.bundle.update(nidm_object.p)

    def export(self):
        """
        Generate a NIDM-Results export.
        """
        try:
            if not os.path.isdir(self.export_dir):
                os.mkdir(self.export_dir)

            # Initialise main bundle
            self._create_bundle(self.version)

            self.add_object(self.software)

            # Add model fitting steps
            if not isinstance(self.model_fittings, list):
                self.model_fittings = list(self.model_fittings.values())

            for model_fitting in self.model_fittings:
                # Design Matrix
                # model_fitting.activity.used(model_fitting.design_matrix)
                self.bundle.used(model_fitting.activity.id,
                                 model_fitting.design_matrix.id)
                self.add_object(model_fitting.design_matrix)
                # *** Export visualisation of the design matrix
                self.add_object(model_fitting.design_matrix.image)

                if model_fitting.design_matrix.image.file is not None:
                    self.add_object(model_fitting.design_matrix.image.file)

                if model_fitting.design_matrix.hrf_models is not None:
                    # drift model
                    self.add_object(model_fitting.design_matrix.drift_model)

                if self.version['major'] > 1 or \
                        (self.version['major'] == 1 and
                         self.version['minor'] >= 3):
                    # Machine
                    # model_fitting.data.wasAttributedTo(model_fitting.machine)
                    self.bundle.wasAttributedTo(model_fitting.data.id,
                                                model_fitting.machine.id)
                    self.add_object(model_fitting.machine)

                    # Imaged subject or group(s)
                    for sub in model_fitting.subjects:
                        self.add_object(sub)
                        # model_fitting.data.wasAttributedTo(sub)
                        self.bundle.wasAttributedTo(model_fitting.data.id,
                                                    sub.id)

                # Data
                # model_fitting.activity.used(model_fitting.data)
                self.bundle.used(model_fitting.activity.id,
                                 model_fitting.data.id)
                self.add_object(model_fitting.data)

                # Error Model
                # model_fitting.activity.used(model_fitting.error_model)
                self.bundle.used(model_fitting.activity.id,
                                 model_fitting.error_model.id)
                self.add_object(model_fitting.error_model)

                # Parameter Estimate Maps
                for param_estimate in model_fitting.param_estimates:
                    # param_estimate.wasGeneratedBy(model_fitting.activity)
                    self.bundle.wasGeneratedBy(param_estimate.id,
                                               model_fitting.activity.id)
                    self.add_object(param_estimate)
                    self.add_object(param_estimate.coord_space)
                    self.add_object(param_estimate.file)

                    if param_estimate.derfrom is not None:
                        self.bundle.wasDerivedFrom(param_estimate.id,
                                                   param_estimate.derfrom.id)
                        self.add_object(param_estimate.derfrom)
                        self.add_object(param_estimate.derfrom.file,
                                        export_file=False)

                # Residual Mean Squares Map
                # model_fitting.rms_map.wasGeneratedBy(model_fitting.activity)
                self.add_object(model_fitting.rms_map)
                self.bundle.wasGeneratedBy(model_fitting.rms_map.id,
                                           model_fitting.activity.id)
                self.add_object(model_fitting.rms_map.coord_space)
                self.add_object(model_fitting.rms_map.file)
                if model_fitting.rms_map.derfrom is not None:
                    self.bundle.wasDerivedFrom(
                        model_fitting.rms_map.id,
                        model_fitting.rms_map.derfrom.id)
                    self.add_object(model_fitting.rms_map.derfrom)
                    self.add_object(model_fitting.rms_map.derfrom.file,
                                    export_file=False)

                # Resels per Voxel Map
                if model_fitting.rpv_map is not None:
                    self.add_object(model_fitting.rpv_map)
                    self.bundle.wasGeneratedBy(model_fitting.rpv_map.id,
                                               model_fitting.activity.id)
                    self.add_object(model_fitting.rpv_map.coord_space)
                    self.add_object(model_fitting.rpv_map.file)
                    if model_fitting.rpv_map.inf_id is not None:
                        self.bundle.used(model_fitting.rpv_map.inf_id,
                                         model_fitting.rpv_map.id)
                    if model_fitting.rpv_map.derfrom is not None:
                        self.bundle.wasDerivedFrom(
                            model_fitting.rpv_map.id,
                            model_fitting.rpv_map.derfrom.id)
                        self.add_object(model_fitting.rpv_map.derfrom)
                        self.add_object(model_fitting.rpv_map.derfrom.file,
                                        export_file=False)

                # Mask
                # model_fitting.mask_map.wasGeneratedBy(model_fitting.activity)
                self.bundle.wasGeneratedBy(model_fitting.mask_map.id,
                                           model_fitting.activity.id)
                self.add_object(model_fitting.mask_map)
                if model_fitting.mask_map.derfrom is not None:
                    self.bundle.wasDerivedFrom(
                        model_fitting.mask_map.id,
                        model_fitting.mask_map.derfrom.id)
                    self.add_object(model_fitting.mask_map.derfrom)
                    self.add_object(model_fitting.mask_map.derfrom.file,
                                    export_file=False)

                # Create coordinate space export
                self.add_object(model_fitting.mask_map.coord_space)
                # Create "Mask map" entity
                self.add_object(model_fitting.mask_map.file)

                # Grand Mean map
                # model_fitting.grand_mean_map.wasGeneratedBy(model_fitting.activity)
                self.bundle.wasGeneratedBy(model_fitting.grand_mean_map.id,
                                           model_fitting.activity.id)
                self.add_object(model_fitting.grand_mean_map)
                # Coordinate space entity
                self.add_object(model_fitting.grand_mean_map.coord_space)
                # Grand Mean Map entity
                self.add_object(model_fitting.grand_mean_map.file)

                # Model Parameters Estimation activity
                self.add_object(model_fitting.activity)
                self.bundle.wasAssociatedWith(model_fitting.activity.id,
                                              self.software.id)
                # model_fitting.activity.wasAssociatedWith(self.software)
                # self.add_object(model_fitting)

            # Add contrast estimation steps
            analysis_masks = dict()
            for (model_fitting_id,
                 pe_ids), contrasts in list(self.contrasts.items()):
                for contrast in contrasts:
                    model_fitting = self._get_model_fitting(model_fitting_id)
                    # for contrast in contrasts:
                    # contrast.estimation.used(model_fitting.rms_map)
                    self.bundle.used(contrast.estimation.id,
                                     model_fitting.rms_map.id)
                    # contrast.estimation.used(model_fitting.mask_map)
                    self.bundle.used(contrast.estimation.id,
                                     model_fitting.mask_map.id)
                    analysis_masks[contrast.estimation.id] = \
                        model_fitting.mask_map.id
                    self.bundle.used(contrast.estimation.id,
                                     contrast.weights.id)
                    self.bundle.used(contrast.estimation.id,
                                     model_fitting.design_matrix.id)
                    # contrast.estimation.wasAssociatedWith(self.software)
                    self.bundle.wasAssociatedWith(contrast.estimation.id,
                                                  self.software.id)

                    for pe_id in pe_ids:
                        # contrast.estimation.used(pe_id)
                        self.bundle.used(contrast.estimation.id, pe_id)

                    # Create estimation activity
                    self.add_object(contrast.estimation)

                    # Create contrast weights
                    self.add_object(contrast.weights)

                    if contrast.contrast_map is not None:
                        # Create contrast Map
                        # contrast.contrast_map.wasGeneratedBy(contrast.estimation)
                        self.bundle.wasGeneratedBy(contrast.contrast_map.id,
                                                   contrast.estimation.id)
                        self.add_object(contrast.contrast_map)
                        self.add_object(contrast.contrast_map.coord_space)
                        # Copy contrast map in export directory
                        self.add_object(contrast.contrast_map.file)

                        if contrast.contrast_map.derfrom is not None:
                            self.bundle.wasDerivedFrom(
                                contrast.contrast_map.id,
                                contrast.contrast_map.derfrom.id)
                            self.add_object(contrast.contrast_map.derfrom)
                            self.add_object(contrast.contrast_map.derfrom.file,
                                            export_file=False)

                    # Create Std Err. Map (T-tests) or Explained Mean Sq. Map
                    # (F-tests)
                    # contrast.stderr_or_expl_mean_sq_map.wasGeneratedBy
                    # (contrast.estimation)
                    stderr_explmeansq_map = (
                        contrast.stderr_or_expl_mean_sq_map)
                    self.bundle.wasGeneratedBy(stderr_explmeansq_map.id,
                                               contrast.estimation.id)
                    self.add_object(stderr_explmeansq_map)
                    self.add_object(stderr_explmeansq_map.coord_space)
                    if isinstance(stderr_explmeansq_map,
                                  ContrastStdErrMap) and \
                            stderr_explmeansq_map.contrast_var:
                        self.add_object(stderr_explmeansq_map.contrast_var)
                        if stderr_explmeansq_map.var_coord_space:
                            self.add_object(
                                stderr_explmeansq_map.var_coord_space)
                        if stderr_explmeansq_map.contrast_var.coord_space:
                            self.add_object(
                                stderr_explmeansq_map.contrast_var.coord_space)
                        self.add_object(
                            stderr_explmeansq_map.contrast_var.file,
                            export_file=False)
                        self.bundle.wasDerivedFrom(
                            stderr_explmeansq_map.id,
                            stderr_explmeansq_map.contrast_var.id)
                    self.add_object(stderr_explmeansq_map.file)

                    # Create Statistic Map
                    # contrast.stat_map.wasGeneratedBy(contrast.estimation)
                    self.bundle.wasGeneratedBy(contrast.stat_map.id,
                                               contrast.estimation.id)
                    self.add_object(contrast.stat_map)
                    self.add_object(contrast.stat_map.coord_space)
                    # Copy Statistical map in export directory
                    self.add_object(contrast.stat_map.file)

                    if contrast.stat_map.derfrom is not None:
                        self.bundle.wasDerivedFrom(
                            contrast.stat_map.id, contrast.stat_map.derfrom.id)
                        self.add_object(contrast.stat_map.derfrom)
                        self.add_object(contrast.stat_map.derfrom.file,
                                        export_file=False)

                    # Create Z Statistic Map
                    if contrast.z_stat_map:
                        # contrast.z_stat_map.wasGeneratedBy(contrast.estimation)
                        self.bundle.wasGeneratedBy(contrast.z_stat_map.id,
                                                   contrast.estimation.id)
                        self.add_object(contrast.z_stat_map)
                        self.add_object(contrast.z_stat_map.coord_space)
                        # Copy Statistical map in export directory
                        self.add_object(contrast.z_stat_map.file)

                    # self.add_object(contrast)

            # Add inference steps
            for contrast_id, inferences in list(self.inferences.items()):
                contrast = self._get_contrast(contrast_id)

                for inference in inferences:
                    if contrast.z_stat_map:
                        used_id = contrast.z_stat_map.id
                    else:
                        used_id = contrast.stat_map.id
                    # inference.inference_act.used(used_id)
                    self.bundle.used(inference.inference_act.id, used_id)
                    # inference.inference_act.wasAssociatedWith(self.software)
                    self.bundle.wasAssociatedWith(inference.inference_act.id,
                                                  self.software.id)

                    # self.add_object(inference)
                    # Excursion set
                    # inference.excursion_set.wasGeneratedBy(inference.inference_act)
                    self.bundle.wasGeneratedBy(inference.excursion_set.id,
                                               inference.inference_act.id)
                    self.add_object(inference.excursion_set)
                    self.add_object(inference.excursion_set.coord_space)
                    if inference.excursion_set.visu is not None:
                        self.add_object(inference.excursion_set.visu)
                        if inference.excursion_set.visu.file is not None:
                            self.add_object(inference.excursion_set.visu.file)
                    # Copy "Excursion set map" file in export directory
                    self.add_object(inference.excursion_set.file)
                    if inference.excursion_set.clust_map is not None:
                        self.add_object(inference.excursion_set.clust_map)
                        self.add_object(inference.excursion_set.clust_map.file)
                        self.add_object(
                            inference.excursion_set.clust_map.coord_space)

                    if inference.excursion_set.mip is not None:
                        self.add_object(inference.excursion_set.mip)
                        self.add_object(inference.excursion_set.mip.file)

                    # Height threshold
                    if inference.height_thresh.equiv_thresh is not None:
                        for equiv in inference.height_thresh.equiv_thresh:
                            self.add_object(equiv)
                    self.add_object(inference.height_thresh)

                    # Extent threshold
                    if inference.extent_thresh.equiv_thresh is not None:
                        for equiv in inference.extent_thresh.equiv_thresh:
                            self.add_object(equiv)
                    self.add_object(inference.extent_thresh)

                    # Display Mask (potentially more than 1)
                    if inference.disp_mask:
                        for mask in inference.disp_mask:
                            # inference.inference_act.used(mask)
                            self.bundle.used(inference.inference_act.id,
                                             mask.id)
                            self.add_object(mask)
                            # Create coordinate space entity
                            self.add_object(mask.coord_space)
                            # Create "Display Mask Map" entity
                            self.add_object(mask.file)

                            if mask.derfrom is not None:
                                self.bundle.wasDerivedFrom(
                                    mask.id, mask.derfrom.id)
                                self.add_object(mask.derfrom)
                                self.add_object(mask.derfrom.file,
                                                export_file=False)

                    # Search Space
                    self.bundle.wasGeneratedBy(inference.search_space.id,
                                               inference.inference_act.id)
                    # inference.search_space.wasGeneratedBy(inference.inference_act)
                    self.add_object(inference.search_space)
                    self.add_object(inference.search_space.coord_space)
                    # Copy "Mask map" in export directory
                    self.add_object(inference.search_space.file)

                    # Peak Definition
                    if inference.peak_criteria:
                        # inference.inference_act.used(inference.peak_criteria)
                        self.bundle.used(inference.inference_act.id,
                                         inference.peak_criteria.id)
                        self.add_object(inference.peak_criteria)

                    # Cluster Definition
                    if inference.cluster_criteria:
                        # inference.inference_act.used(inference.cluster_criteria)
                        self.bundle.used(inference.inference_act.id,
                                         inference.cluster_criteria.id)
                        self.add_object(inference.cluster_criteria)

                    if inference.clusters:
                        # Clusters and peaks
                        for cluster in inference.clusters:
                            # cluster.wasDerivedFrom(inference.excursion_set)
                            self.bundle.wasDerivedFrom(
                                cluster.id, inference.excursion_set.id)
                            self.add_object(cluster)
                            for peak in cluster.peaks:
                                self.bundle.wasDerivedFrom(peak.id, cluster.id)
                                self.add_object(peak)
                                self.add_object(peak.coordinate)

                            if cluster.cog is not None:
                                self.bundle.wasDerivedFrom(
                                    cluster.cog.id, cluster.id)
                                self.add_object(cluster.cog)
                                self.add_object(cluster.cog.coordinate)

                    # Inference activity
                    # inference.inference_act.wasAssociatedWith(inference.software_id)
                    # inference.inference_act.used(inference.height_thresh)
                    self.bundle.used(inference.inference_act.id,
                                     inference.height_thresh.id)
                    # inference.inference_act.used(inference.extent_thresh)
                    self.bundle.used(inference.inference_act.id,
                                     inference.extent_thresh.id)
                    self.bundle.used(inference.inference_act.id,
                                     analysis_masks[contrast.estimation.id])
                    self.add_object(inference.inference_act)

            # Write-out prov file
            self.save_prov_to_files()

            return self.out_dir
        except Exception:
            self.cleanup()
            raise

    def _get_model_fitting(self, mf_id):
        """
        Retreive model fitting with identifier 'mf_id' from the list of model
        fitting objects stored in self.model_fitting
        """
        for model_fitting in self.model_fittings:
            if model_fitting.activity.id == mf_id:
                return model_fitting

        raise Exception("Model fitting activity with id: " + str(mf_id) +
                        " not found.")

    def _get_contrast(self, con_id):
        """
        Retreive contrast with identifier 'con_id' from the list of contrast
        objects stored in self.contrasts
        """
        for contrasts in list(self.contrasts.values()):
            for contrast in contrasts:
                if contrast.estimation.id == con_id:
                    return contrast
        raise Exception("Contrast activity with id: " + str(con_id) +
                        " not found.")

    def _add_namespaces(self):
        """
        Add namespaces to NIDM document.
        """
        self.doc.add_namespace(NIDM)
        self.doc.add_namespace(NIIRI)
        self.doc.add_namespace(CRYPTO)
        self.doc.add_namespace(DCT)
        self.doc.add_namespace(DC)
        self.doc.add_namespace(NFO)
        self.doc.add_namespace(OBO)
        self.doc.add_namespace(SCR)
        self.doc.add_namespace(NIF)

    def _create_bundle(self, version):
        """
        Initialise NIDM-Results bundle.
        """
        # *** Bundle entity
        if not hasattr(self, 'bundle_ent'):
            self.bundle_ent = NIDMResultsBundle(nidm_version=version['num'])

        self.bundle = ProvBundle(identifier=self.bundle_ent.id)

        self.bundle_ent.export(self.version, self.export_dir)

        # # provn export
        # self.bundle = ProvBundle(identifier=bundle_id)

        self.doc.entity(self.bundle_ent.id,
                        other_attributes=self.bundle_ent.attributes)

        # *** NIDM-Results Export Activity
        if version['num'] not in ["1.0.0", "1.1.0"]:
            if not hasattr(self, 'export_act'):
                self.export_act = NIDMResultsExport()
            self.export_act.export(self.version, self.export_dir)
            # self.doc.update(self.export_act.p)
            self.doc.activity(self.export_act.id,
                              other_attributes=self.export_act.attributes)

        # *** bundle was Generated by NIDM-Results Export Activity
        if not hasattr(self, 'export_time'):
            self.export_time = str(datetime.datetime.now().time())

        if version['num'] in ["1.0.0", "1.1.0"]:
            self.doc.wasGeneratedBy(entity=self.bundle_ent.id,
                                    time=self.export_time)
        else:
            # provn
            self.doc.wasGeneratedBy(entity=self.bundle_ent.id,
                                    activity=self.export_act.id,
                                    time=self.export_time)

        # *** NIDM-Results Exporter (Software Agent)
        if version['num'] not in ["1.0.0", "1.1.0"]:
            if not hasattr(self, 'exporter'):
                self.exporter = self._get_exporter()
            self.exporter.export(self.version, self.export_dir)
            # self.doc.update(self.exporter.p)
            self.doc.agent(self.exporter.id,
                           other_attributes=self.exporter.attributes)

            self.doc.wasAssociatedWith(self.export_act.id, self.exporter.id)

    def _get_model_parameters_estimations(self, error_model):
        """
        Infer model estimation method from the 'error_model'. Return an object
        of type ModelParametersEstimation.
        """
        if error_model.dependance == NIDM_INDEPEDENT_ERROR:
            if error_model.variance_homo:
                estimation_method = STATO_OLS
            else:
                estimation_method = STATO_WLS
        else:
            estimation_method = STATO_GLS

        mpe = ModelParametersEstimation(estimation_method, self.software.id)

        return mpe

    def use_prefixes(self, ttl):
        prefix_file = os.path.join(os.path.dirname(__file__), 'prefixes.csv')
        context = dict()
        with open(prefix_file, encoding="ascii") as csvfile:
            reader = csv.reader(csvfile)
            next(reader, None)  # skip the headers
            for alphanum_id, prefix, uri in reader:
                if alphanum_id in ttl:
                    context[prefix] = uri
                    ttl = "@prefix " + prefix + ": <" + uri + "> .\n" + ttl
                    ttl = ttl.replace(alphanum_id, prefix + ":")
                    if uri in ttl:
                        ttl = ttl.replace(alphanum_id, prefix + ":")
                elif uri in ttl:
                    context[prefix] = uri
                    ttl = "@prefix " + prefix + ": <" + uri + "> .\n" + ttl
                    ttl = ttl.replace(alphanum_id, prefix + ":")
        return (ttl, context)

    def save_prov_to_files(self, showattributes=False):
        """
        Write-out provn serialisation to nidm.provn.
        """
        self.doc.add_bundle(self.bundle)
        # provn_file = os.path.join(self.export_dir, 'nidm.provn')
        # provn_fid = open(provn_file, 'w')
        # # FIXME None
        # # provn_fid.write(self.doc.get_provn(4).replace("None", "-"))
        # provn_fid.close()

        ttl_file = os.path.join(self.export_dir, 'nidm.ttl')
        ttl_txt = self.doc.serialize(format='rdf', rdf_format='turtle')
        ttl_txt, json_context = self.use_prefixes(ttl_txt)

        # Add namespaces to json-ld context
        for namespace in self.doc._namespaces.get_registered_namespaces():
            json_context[namespace._prefix] = namespace._uri
        for namespace in \
                list(self.doc._namespaces._default_namespaces.values()):
            json_context[namespace._prefix] = namespace._uri
        json_context["xsd"] = "http://www.w3.org/2000/01/rdf-schema#"

        # Work-around to issue with INF value in rdflib (reported in
        # https://github.com/RDFLib/rdflib/pull/655)
        ttl_txt = ttl_txt.replace(' inf ', ' "INF"^^xsd:float ')
        with open(ttl_file, 'w') as ttl_fid:
            ttl_fid.write(ttl_txt)

        # print(json_context)
        jsonld_file = os.path.join(self.export_dir, 'nidm.json')
        jsonld_txt = self.doc.serialize(format='rdf',
                                        rdf_format='json-ld',
                                        context=json_context)
        with open(jsonld_file, 'w') as jsonld_fid:
            jsonld_fid.write(jsonld_txt)

        # provjsonld_file = os.path.join(self.export_dir, 'nidm.provjsonld')
        # provjsonld_txt = self.doc.serialize(format='jsonld')
        # with open(provjsonld_file, 'w') as provjsonld_fid:
        #     provjsonld_fid.write(provjsonld_txt)

        # provn_file = os.path.join(self.export_dir, 'nidm.provn')
        # provn_txt = self.doc.serialize(format='provn')
        # with open(provn_file, 'w') as provn_fid:
        #     provn_fid.write(provn_txt)

        # Post-processing
        if not self.zipped:
            # Just rename temp directory to output_path
            os.rename(self.export_dir, self.out_dir)
        else:
            # Create a zip file that contains the content of the temp directory
            os.chdir(self.export_dir)
            zf = zipfile.ZipFile(os.path.join("..", self.out_dir), mode='w')
            try:
                for root, dirnames, filenames in os.walk("."):
                    for filename in filenames:
                        zf.write(os.path.join(filename))
                shutil.rmtree(os.path.join("..", self.export_dir))
            finally:
                zf.close()
                os.chdir("..")