예제 #1
0
def datatypes():
    g = ProvDocument()
    ex = Namespace('ex', 'http://example.org/')
    g.add_namespace(ex)

    attributes = {
        'ex:int':
        100,
        'ex:float':
        100.123456,
        'ex:long':
        123456789000,
        'ex:bool':
        True,
        'ex:str':
        'Some string',
        'ex:unicode':
        u'Some unicode string with accents: Huỳnh Trung Đông',
        'ex:timedate':
        datetime.datetime(2012, 12, 12, 14, 7, 48),
        'ex:intstr':
        Literal("PROV Internationalized string",
                PROV["InternationalizedString"], "en"),
    }
    multiline = """Line1
    Line2
Line3"""
    attributes['ex:multi-line'] = multiline
    g.entity('ex:e1', attributes)
    return g
예제 #2
0
def add_namespaces_to_bundle(prov_bundle, metadata):
    namespaces = dict()
    try:
        namespace_str = metadata[METADATA_KEY_NAMESPACES]
    except ValueError:
        SerializerException("No valid namespace provided, should be a string of a dict: {}".format(metadata))
        return

    if type(namespace_str) is str:
        io = StringIO(namespace_str)
        namespaces = json.load(io)
    elif type(namespace_str) is dict:
        namespaces = namespace_str
    else:
        raise SerializerException(
            "Namespaces metadata should returned as json string or dict not as {}".format(type(namespace_str)))

    for prefix, uri in namespaces.items():
        if prefix is not None and uri is not None:
            if prefix != 'default':
                prov_bundle.add_namespace(Namespace(prefix, uri))
            else:
                prov_bundle.set_default_namespace(uri)
        else:
            SerializerException("No valid namespace provided for the metadata: {}".format(metadata))
예제 #3
0
def datatypes():
    g = ProvDocument()
    ex = Namespace("ex", "http://example.org/")
    g.add_namespace(ex)

    attributes = {
        "ex:int":
        100,
        "ex:float":
        100.123456,
        "ex:long":
        123456789000,
        "ex:bool":
        True,
        "ex:str":
        "Some string",
        "ex:unicode":
        "Some unicode string with accents: Huỳnh Trung Đông",
        "ex:timedate":
        datetime.datetime(2012, 12, 12, 14, 7, 48),
        "ex:intstr":
        Literal("PROV Internationalized string",
                PROV["InternationalizedString"], "en"),
    }
    multiline = """Line1
    Line2
Line3"""
    attributes["ex:multi-line"] = multiline
    g.entity("ex:e1", attributes)
    return g
예제 #4
0
def example_graph():
    FOAF = Namespace("foaf","http://xmlns.com/foaf/0.1/")
    EX = Namespace("ex","http://www.example.com/")
    DCTERMS = Namespace("dcterms","http://purl.org/dc/terms/")
    
    # create a provenance _container
    g = ProvBundle()
    
    # Set the default _namespace name
    g.set_default_namespace(EX.get_uri())
    g.add_namespace(DCTERMS)
    
    # add entities, first define the _attributes in a dictionary
    e0_attrs = {PROV["type"]: "File",
                EX["path"]: "/shared/crime.txt",
                EX["creator"]: "Alice"}
    # then create the entity
    # If you give the id as a string, it will be treated as a localname
    # under the default _namespace
    e0 = g.entity(EX["e0"], e0_attrs)
    
    # define the _attributes for the next entity
    lit0 = Literal("2011-11-16T16:06:00", XSD["dateTime"])
    attrdict ={PROV["type"]: EX["File"],
               EX["path"]: "/shared/crime.txt",
               DCTERMS["creator"]: FOAF['Alice'],
               EX["content"]: "",
               DCTERMS["create"]: lit0}
    # create the entity, note this time we give the id as a PROVQname
    e1 = g.entity(FOAF['Foo'], attrdict)
    
    # add activities
    # You can give the _attributes during the creation if there are not many
    a0 = g.activity(EX['a0'], datetime.datetime(2008, 7, 6, 5, 4, 3), None, {PROV["type"]: EX["create-file"]})
    
    g0 = g.wasGeneratedBy(e0, a0, None, "g0", {EX["fct"]: "create"})
    
    attrdict={EX["fct"]: "load",
              EX["typeexample"] : Literal("MyValue", EX["MyType"])}
    u0 = g.used(a0, e1, None, "u0", attrdict)
    
    # The id for a relation is an optional argument, The system will generate one
    # if you do not specify it 
    g.wasDerivedFrom(e0, e1, a0, g0, u0)

    return g
예제 #5
0
def collections():
    g = ProvDocument()
    ex = Namespace('ex', 'http://example.org/')

    c1 = g.collection(ex['c1'])
    e1 = g.entity('ex:e1')
    g.hadMember(c1, e1)

    return g
예제 #6
0
파일: testModel.py 프로젝트: luis-rr/prov
    def test_xsd_qnames(self):
        prov_doc = ProvDocument()
        ex = Namespace('ex', 'http://www.example.org')
        prov_doc.add_namespace(ex)

        an_xsd_qname = XSDQName(ex['a_value'])
        prov_doc.entity('ex:e1', {'prov:value': an_xsd_qname})

        self.assertPROVJSONRoundTripEquivalence(prov_doc)
예제 #7
0
def collections():
    g = ProvDocument()
    ex = Namespace("ex", "http://example.org/")

    c1 = g.collection(ex["c1"])
    e1 = g.entity("ex:e1")
    g.hadMember(c1, e1)

    return g
예제 #8
0
def bundles1():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/bundles1.provn
    # ===============================================================================
    # document
    g = ProvDocument()

    #   prefix ex  <http://example.org/example/>
    EX = Namespace("ex", "http://www.example.com/")
    g.add_namespace(EX)

    #   prefix alice  <http://example.org/alice/>
    #   prefix bob  <http://example.org/bob/>
    g.add_namespace("alice", "http://example.org/alice/")
    g.add_namespace("bob", "http://example.org/bob/")

    #   entity(bob:bundle1, [prov:type='prov:Bundle'])
    g.entity("bob:bundle1", {"prov:type": PROV["Bundle"]})
    #   wasGeneratedBy(bob:bundle1, -, 2012-05-24T10:30:00)
    g.wasGeneratedBy("bob:bundle1", time="2012-05-24T10:30:00")
    #   agent(ex:Bob)
    g.agent("ex:Bob")
    #   wasAttributedTo(bob:bundle1, ex:Bob)
    g.wasAttributedTo("bob:bundle1", "ex:Bob")

    #   entity(alice:bundle2, [ prov:type='prov:Bundle' ])
    g.entity("alice:bundle2", {"prov:type": PROV["Bundle"]})
    #   wasGeneratedBy(alice:bundle2, -, 2012-05-25T11:15:00)
    g.wasGeneratedBy("alice:bundle2", time="2012-05-25T11:15:00")
    #   agent(ex:Alice)
    g.agent("ex:Alice")
    #   wasAttributedTo(alice:bundle2, ex:Alice)
    g.wasAttributedTo("alice:bundle2", "ex:Alice")

    #   bundle bob:bundle1
    b1 = g.bundle("bob:bundle1")
    #     entity(ex:report1, [ prov:type="report", ex:version=1 ])
    b1.entity("ex:report1", {"prov:type": "report", "ex:version": 1})
    #     wasGeneratedBy(ex:report1, -, 2012-05-24T10:00:01)
    b1.wasGeneratedBy("ex:report1", time="2012-05-24T10:00:01")
    #   endBundle

    #   bundle alice:bundle2
    b2 = g.bundle("alice:bundle2")
    #     entity(ex:report1)
    b2.entity("ex:report1")
    #     entity(ex:report2, [ prov:type="report", ex:version=2 ])
    b2.entity("ex:report2", {"prov:type": "report", "ex:version": 2})
    #     wasGeneratedBy(ex:report2, -, 2012-05-25T11:00:01)
    b2.wasGeneratedBy("ex:report2", time="2012-05-25T11:00:01")
    #     wasDerivedFrom(ex:report2, ex:report1)
    b2.wasDerivedFrom("ex:report2", "ex:report1")
    #   endBundle

    # endDocument
    return g
예제 #9
0
def ctfToProv():
    d1 = ProvDocument()
    dummy = ProvDocument()
    ex = Namespace(
        'ex', 'http://example/'
    )  # namespaces do not need to be explicitly added to a document
    #data = event_field(os.path.join(trace_path,'../config.yaml'))
    counter = 0
    counter_1 = 0
    relationships = []
    entities = []
    activities = []
    for event in trace_collection.events:
        dataset = {
            'ex:' + k: event[k]
            for k in event.field_list_with_scope(
                babeltrace.CTFScope.EVENT_FIELDS)
        }
        dataset.update(
            {'ex:' + 'timestamp': (event['timestamp'] / 1000000000)})
        #dataset.update({'ex:'+'name':event.name})

        e1 = d1.entity(ex['event' + str(counter)], dataset)
        entities.append(e1)
        producer_agent = d1.agent('ex:' + event['producer_id'])
        controller_agent = d1.agent('ex:' + event['controller_id'])
        activity = d1.activity('ex:' + event['activity'] + str(counter_1))
        activities.append(activity)
        d1.wasGeneratedBy(e1, activity)
        # strings used to detect if the relationship already exists in the d1 document
        association_relationship = str(
            dummy.wasAssociatedWith(activity, producer_agent))
        used_relationship = str(dummy.used(controller_agent, producer_agent))

        # Add activity to producer agent if it has not been added before.
        d1.wasAssociatedWith(activity, producer_agent)
        # if association_relationship not in relationships:
        #     d1.wasAssociatedWith(activity, producer_agent)
        #     relationships.append(association_relationship)

        # Add producer agent to controller agent if it has not been added yet.
        if used_relationship not in relationships:
            d1.used(controller_agent, producer_agent)
            relationships.append(used_relationship)

        # Add temporal relationship between this event and the previous one.
        if counter > 0:
            d1.wasAssociatedWith(entities[counter - 1], e1)

        counter += 1
        counter_1 += 1
    return d1
예제 #10
0
파일: examples.py 프로젝트: luis-rr/prov
def long_literals():
    g = ProvDocument()

    long_uri = "http://Lorem.ipsum/dolor/sit/amet/consectetur/adipiscing/elit/Quisque/vel/sollicitudin/felis/nec/venenatis/massa/Aenean/lectus/arcu/sagittis/sit/amet/nisl/nec/varius/eleifend/sem/In/hac/habitasse/platea/dictumst/Aliquam/eget/fermentum/enim/Curabitur/auctor/elit/non/ipsum/interdum/at/orci/aliquam/"
    ex = Namespace('ex', long_uri)
    g.add_namespace(ex)

    g.entity(
        'ex:e1', {
            'prov:label':
            'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec pellentesque luctus nulla vel ullamcorper. Donec sit amet ligula sit amet lorem pretium rhoncus vel vel lorem. Sed at consequat metus, eget eleifend massa. Fusce a facilisis turpis. Lorem volutpat.'
        })

    return g
예제 #11
0
    def add_namespace(self, db_node, prov_bundle):
        try:
            prefixes = db_node.properties[DOC_PROPERTY_NAME_NAMESPACE_PREFIX]
            uris = db_node.properties[DOC_PROPERTY_NAME_NAMESPACE_URI]
        except KeyError:
            return

        for prefix, uri in zip(prefixes, uris):
            if prefix is not None and uri is not None:
                if prefix != 'default':
                    prov_bundle.add_namespace(Namespace(prefix, uri))
                else:
                    prov_bundle.set_default_namespace(uri)
            else:
                ProvDeserializerException(
                    "No valid namespace provided for the node: %s" % db_node)
예제 #12
0
def add_namespaces_to_bundle(prov_bundle, metadata):
    """
    Add all namespaces in the metadata_dict to the provided bundle

    :param prov_bundle:
    :param metadata:
    :return: None
    """
    namespaces = dict()
    try:
        namespace_str = metadata[METADATA_KEY_NAMESPACES]
    except ValueError:
        SerializerException(
            "No valid namespace provided, should be a string of a dict: {}".
            format(metadata))
        return

    if type(namespace_str) is str:
        io = StringIO(namespace_str)
        namespaces = json.load(io)
    elif type(namespace_str) is dict:
        namespaces = namespace_str
    elif type(namespace_str) is list:
        for entry in namespace_str:
            if type(entry) is str:
                io = StringIO(entry)
                namespaces.update(json.load(io))
            else:
                raise SerializerException(
                    "Namespaces metadata should returned as json string dict or list of json strings not as {}"
                    .format(type(namespace_str)))

    else:
        raise SerializerException(
            "Namespaces metadata should returned as json string dict or list of json strings not as  {}"
            .format(type(namespace_str)))

    for prefix, uri in namespaces.items():
        if prefix is not None and uri is not None:
            if prefix != 'default':
                prov_bundle.add_namespace(Namespace(prefix, uri))
            else:
                prov_bundle.set_default_namespace(uri)
        else:
            SerializerException(
                "No valid namespace provided for the metadata: {}".format(
                    metadata))
예제 #13
0
    def update(self):
        """
        Checks current environment and updates attributes using the os.environ module.
        :return: Sets attributes to self.
        """
        env_dict = dict(os.environ.items())
        env_hash = dict_to_sha256(env_dict)
        if env_hash != self.env_hash:
            self.env_dict = env_dict
            self.env_hash = env_hash

            # this is only to prevent build errors
            try:
                self.user = self.env_dict["USER"]
            except KeyError:  # no cover
                self.env_dict["USER"] = "******"  # no cover
            self.env_namespace = Namespace("envs", str(self))
예제 #14
0
    def __init__(self, *args, **kwargs):
        """Constructor."""

        # update namespaces
        if 'namespaces' not in kwargs:
            kwargs['namespaces'] = self.NAMESPACES
        else:
            if isinstance(kwargs['namespaces'], dict):
                kwargs['namespaces'] = [
                    Namespace(prefix, uri)
                    for prefix, uri in list(kwargs['namespaces'].items())
                ]
            kwargs['namespaces'].extend(self.NAMESPACES)

        # track organizations to remove redundancy
        self.prov_es_orgs = {}

        super(ProvEsDocument, self).__init__(*args, **kwargs)
예제 #15
0
"""
Definition of constants.

@author: Camille Maumet <*****@*****.**>
@copyright: University of Warwick 2013-2014
"""

from prov.model import Namespace
from prov.model import PROV

NIDM = Namespace('nidm', "http://www.incf.org/ns/nidash/nidm#")
NIIRI = Namespace("niiri", "http://iri.nidash.org/")
CRYPTO = Namespace("crypto", "http://id.loc.gov/vocabulary/preservation/cryptographicHashFunctions#")
FSL = Namespace("fsl", "http://www.incf.org/ns/nidash/fsl#")
DCT = Namespace("dct", "http://purl.org/dc/terms/")

GAUSSIAN_DISTRIBUTION = NIDM['GaussianDistribution']

INDEPEDENT_CORR = NIDM['IndependentError']
SERIALLY_CORR = NIDM['SeriallyCorrelatedError']
COMPOUND_SYMMETRY_CORR = NIDM['CompoundSymmetricError']
ARBITRARILY_CORR = NIDM['ArbitriralyCorrelatedError']

CORRELATION_ENUM = {
    INDEPEDENT_CORR,
    SERIALLY_CORR,
    COMPOUND_SYMMETRY_CORR,
    ARBITRARILY_CORR
}

SPATIALLY_GLOBAL = NIDM['SpatiallyGlocal']
예제 #16
0
def primer_example():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/prov-n/src/test/resources/prov/primer.pn
    # ===========================================================================
    # document
    g = ProvDocument()

    #    prefix ex <http://example/>
    #    prefix dcterms <http://purl.org/dc/terms/>
    #    prefix foaf <http://xmlns.com/foaf/0.1/>
    ex = Namespace(
        "ex", "http://example/"
    )  # namespaces do not need to be explicitly added to a document
    g.add_namespace("dcterms", "http://purl.org/dc/terms/")
    g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")

    #    entity(ex:article, [dcterms:title="Crime rises in cities"])
    # first time the ex namespace was used, it is added to the document automatically
    g.entity(ex["article"], {"dcterms:title": "Crime rises in cities"})
    #    entity(ex:articleV1)
    g.entity(ex["articleV1"])
    #    entity(ex:articleV2)
    g.entity(ex["articleV2"])
    #    entity(ex:dataSet1)
    g.entity(ex["dataSet1"])
    #    entity(ex:dataSet2)
    g.entity(ex["dataSet2"])
    #    entity(ex:regionList)
    g.entity(ex["regionList"])
    #    entity(ex:composition)
    g.entity(ex["composition"])
    #    entity(ex:chart1)
    g.entity(ex["chart1"])
    #    entity(ex:chart2)
    g.entity(ex["chart2"])
    #    entity(ex:blogEntry)
    g.entity(ex["blogEntry"])

    #    activity(ex:compile)
    g.activity(
        "ex:compile")  # since ex is registered, it can be used like this
    #    activity(ex:compile2)
    g.activity("ex:compile2")
    #    activity(ex:compose)
    g.activity("ex:compose")
    #    activity(ex:correct, 2012-03-31T09:21:00, 2012-04-01T15:21:00)
    g.activity("ex:correct", "2012-03-31T09:21:00",
               "2012-04-01T15:21:00")  # date time can be provided as strings
    #    activity(ex:illustrate)
    g.activity("ex:illustrate")

    #    used(ex:compose, ex:dataSet1, -,   [ prov:role = "ex:dataToCompose"])
    g.used("ex:compose",
           "ex:dataSet1",
           other_attributes={"prov:role": "ex:dataToCompose"})
    #    used(ex:compose, ex:regionList, -, [ prov:role = "ex:regionsToAggregateBy"])
    g.used(
        "ex:compose",
        "ex:regionList",
        other_attributes={"prov:role": "ex:regionsToAggregateBy"},
    )
    #    wasGeneratedBy(ex:composition, ex:compose, -)
    g.wasGeneratedBy("ex:composition", "ex:compose")

    #    used(ex:illustrate, ex:composition, -)
    g.used("ex:illustrate", "ex:composition")
    #    wasGeneratedBy(ex:chart1, ex:illustrate, -)
    g.wasGeneratedBy("ex:chart1", "ex:illustrate")

    #    wasGeneratedBy(ex:chart1, ex:compile,  2012-03-02T10:30:00)
    g.wasGeneratedBy("ex:chart1", "ex:compile", "2012-03-02T10:30:00")
    #    wasGeneratedBy(ex:chart2, ex:compile2, 2012-04-01T15:21:00)
    #
    #
    #    agent(ex:derek, [ prov:type="prov:Person", foaf:givenName = "Derek",
    #           foaf:mbox= "<mailto:[email protected]>"])
    g.agent(
        "ex:derek",
        {
            "prov:type": PROV["Person"],
            "foaf:givenName": "Derek",
            "foaf:mbox": "<mailto:[email protected]>",
        },
    )
    #    wasAssociatedWith(ex:compose, ex:derek, -)
    g.wasAssociatedWith("ex:compose", "ex:derek")
    #    wasAssociatedWith(ex:illustrate, ex:derek, -)
    g.wasAssociatedWith("ex:illustrate", "ex:derek")
    #
    #    agent(ex:chartgen, [ prov:type="prov:Organization",
    #           foaf:name = "Chart Generators Inc"])
    g.agent(
        "ex:chartgen",
        {
            "prov:type": PROV["Organization"],
            "foaf:name": "Chart Generators Inc"
        },
    )
    #    actedOnBehalfOf(ex:derek, ex:chartgen, ex:compose)
    g.actedOnBehalfOf("ex:derek", "ex:chartgen", "ex:compose")
    #    wasAttributedTo(ex:chart1, ex:derek)
    g.wasAttributedTo("ex:chart1", "ex:derek")

    #    wasGeneratedBy(ex:dataSet2, ex:correct, -)
    g.wasGeneratedBy("ex:dataSet2", "ex:correct")
    #    used(ex:correct, ex:dataSet1, -)
    g.used("ex:correct", "ex:dataSet1")
    #    wasDerivedFrom(ex:dataSet2, ex:dataSet1, [prov:type='prov:Revision'])
    g.wasDerivedFrom("ex:dataSet2",
                     "ex:dataSet1",
                     other_attributes={"prov:type": PROV["Revision"]})
    #    wasDerivedFrom(ex:chart2, ex:dataSet2)
    g.wasDerivedFrom("ex:chart2", "ex:dataSet2")

    #    wasDerivedFrom(ex:blogEntry, ex:article, [prov:type='prov:Quotation'])
    g.wasDerivedFrom("ex:blogEntry",
                     "ex:article",
                     other_attributes={"prov:type": PROV["Quotation"]})
    #    specializationOf(ex:articleV1, ex:article)
    g.specializationOf("ex:articleV1", "ex:article")
    #    wasDerivedFrom(ex:articleV1, ex:dataSet1)
    g.wasDerivedFrom("ex:articleV1", "ex:dataSet1")

    #    specializationOf(ex:articleV2, ex:article)
    g.specializationOf("ex:articleV2", "ex:article")
    #    wasDerivedFrom(ex:articleV2, ex:dataSet2)
    g.wasDerivedFrom("ex:articleV2", "ex:dataSet2")

    #    alternateOf(ex:articleV2, ex:articleV1)
    g.alternateOf("ex:articleV2", "ex:articleV1")

    # endDocument
    return g
예제 #17
0
"""
Definition of constants.

@author: Camille Maumet <*****@*****.**>
@copyright: University of Warwick 2013-2014
"""

from prov.model import PROV, Namespace, NamespaceManager

NIDM = Namespace('nidm', "http://purl.org/nidash/nidm#")
NIIRI = Namespace("niiri", "http://iri.nidash.org/")
CRYPTO = Namespace(
    "crypto",
    "http://id.loc.gov/vocabulary/preservation/cryptographicHashFunctions#")
FSL = Namespace("fsl", "http://purl.org/nidash/fsl#")
SPM = Namespace("spm", "http://purl.org/nidash/spm#")
AFNI = Namespace("afni", "http://purl.org/nidash/afni#")
DCT = Namespace("dct", "http://purl.org/dc/terms/")
OBO = Namespace("obo", "http://purl.obolibrary.org/obo/")
DCTYPE = Namespace("dctype", "http://purl.org/dc/dcmitype/")
NLX_OLD = Namespace("nlx_old", "http://neurolex.org/wiki/")
DC = Namespace("dc", "http://purl.org/dc/elements/1.1/")
NFO = Namespace(
    "nfo", "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#")
SCR = Namespace("scr", "http://scicrunch.org/resolver/")
NIF = Namespace("nif", "http://uri.neuinfo.org/nif/nifstd/")

namespaces = (PROV, NIDM, NIIRI, CRYPTO, FSL, SPM, AFNI, DCT, OBO,
              DCTYPE, DC, NFO, SCR, NIF)

namespace_manager = NamespaceManager(namespaces)
예제 #18
0
def w3c_publication_2():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication2.prov-asn
    # ===========================================================================
    # bundle
    #
    # prefix ex <http://example.org/>
    # prefix rec <http://example.org/record>
    #
    # prefix w3 <http://www.w3.org/TR/2011/>
    # prefix hg <http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/>
    #
    #
    # entity(hg:Overview.html, [ prov:type="file in hg" ])
    # entity(w3:WD-prov-dm-20111215, [ prov:type="html4" ])
    #
    #
    # activity(ex:rcp,-,-,[prov:type="copy directory"])
    #
    # wasGeneratedBy(rec:g; w3:WD-prov-dm-20111215, ex:rcp, -)
    #
    # entity(ex:req3, [ prov:type="http://www.w3.org/2005/08/01-transitions.html#pubreq" %% xsd:anyURI ])
    #
    # used(rec:u; ex:rcp,hg:Overview.html,-)
    # used(ex:rcp, ex:req3, -)
    #
    #
    # wasDerivedFrom(w3:WD-prov-dm-20111215, hg:Overview.html, ex:rcp, rec:g, rec:u)
    #
    # agent(ex:webmaster, [ prov:type='prov:Person' ])
    #
    # wasAssociatedWith(ex:rcp, ex:webmaster, -)
    #
    # endBundle
    # ===========================================================================

    ex = Namespace("ex", "http://example.org/")
    rec = Namespace("rec", "http://example.org/record")
    w3 = Namespace("w3", "http://www.w3.org/TR/2011/")
    hg = Namespace(
        "hg",
        "http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/",
    )

    g = ProvDocument()

    g.entity(hg["Overview.html"], {"prov:type": "file in hg"})
    g.entity(w3["WD-prov-dm-20111215"], {"prov:type": "html4"})

    g.activity(ex["rcp"], None, None, {"prov:type": "copy directory"})

    g.wasGeneratedBy("w3:WD-prov-dm-20111215", "ex:rcp", identifier=rec["g"])

    g.entity(
        "ex:req3",
        {
            "prov:type":
            Identifier("http://www.w3.org/2005/08/01-transitions.html#pubreq")
        },
    )

    g.used("ex:rcp", "hg:Overview.html", identifier="rec:u")
    g.used("ex:rcp", "ex:req3")

    g.wasDerivedFrom("w3:WD-prov-dm-20111215", "hg:Overview.html", "ex:rcp",
                     "rec:g", "rec:u")

    g.agent("ex:webmaster", {"prov:type": "Person"})

    g.wasAssociatedWith("ex:rcp", "ex:webmaster")

    return g
예제 #19
0
def ctfToProv():
    d1 = ProvDocument()
    dummy = ProvDocument()
    ex = Namespace(
        'ex', 'http://example/'
    )  # namespaces do not need to be explicitly added to a document
    #data = event_field(os.path.join(trace_path,'../config.yaml'))
    counter = 0
    #counter_1 = 0
    relationships = []
    entityActivityList = []
    # activities = []
    can_events = defaultdict(list)
    for event in trace_collection.events:
        dataset = {
            'ex:' + k: event[k]
            for k in event.field_list_with_scope(
                babeltrace.CTFScope.EVENT_FIELDS)
        }
        #dataset.update({'ex:'+'timestamp':(event['timestamp']/1000000000)})
        dataset.update({'ex:' + 'name': event.name})

        # #calculates PGN

        # pf = str(bin(int(dataset['node_id'], 16)))[5:13]

        # if int(pf) > 240:
        #     pgn = int(str(bin(int(dataset['node_id'], 16)))[3:21], 2)
        # else:
        #     pgn = int(str(bin(int(dataset['node_id'], 16)))[3:13], 2)

        # #Gets source address.
        # sa = str(bin(int(dataset['node_id'], 16)))[-8:]  #gets last byte.

        sa = event['producer_id']

        activity = event['activity']

        e1 = d1.entity(ex['event' + str(counter)], dataset)

        #create class object to store entity and activity data field.

        entity_activity = entityActivity()

        entity_activity.addEntityActivity(e1, activity)
        #entityActivityList.append(e1)
        #can_events.setdefault(str(sa),[]).append(e1)

        can_events[sa].append(entity_activity)
        #node_id = d1.agent('ex:'+event['node_id'])
        controller_agent = d1.agent('ex:' + event['controller_id'])

        # activity = d1.activity('ex:'+event['activity']+str(counter))
        # activities.append(activity)

        #d1.wasGeneratedBy(e1, activity)
        # strings used to detect if the relationship already exists in the d1 document
        # association_relationship = str(dummy.wasAssociatedWith(activity, sa))

        # used_relationship = str(dummy.used(network_id, sa))

        #add activity to sensor agent
    # d1.wasAssociatedWith(activity,sensor_agent)
    #check if the association already esists
    # if association_relationship not in relationships:
    #     d1.wasAssociatedWith(activity,sensor_agent)
    #     relationships.append(association_relationship)
    # if used_relationship not in relationships:
    #     d1.used(network_id, sa)
    #     relationships.append(used_relationship)
    #counter+=1
    #counter_1 +=1
    # for index in range(len(entityActivityList)-1):
    #     d1.wasAssociatedWith(entityActivityList[index], entityActivityList[index + 1])

    # for index in range(len(entityActivityList)):
    #     d1.wasGeneratedBy(entityActivityList[index], activities[index])
    #     d1.wasAssociatedWith(activities[index],sa)

    for key in can_events.keys():

        producer_agent = d1.agent('ex:' + str(key))
        used_relationship = str(dummy.used(controller_agent, producer_agent))
        #association_relationship = str(dummy.wasAssociatedWith(activity, sa))

        if used_relationship not in relationships:
            d1.used(controller_agent, producer_agent)
            relationships.append(used_relationship)

        entityActivityList = can_events[key]

        for index in range(len(entityActivityList) - 1):
            d1.wasAssociatedWith(entityActivityList[index].getEntity(),
                                 entityActivityList[index + 1].getEntity())
            d1.wasGeneratedBy(entityActivityList[index],
                              entityActivityList[index].getActivity())
            d1.wasAssociatedWith(entityActivityList[index].getActivity(),
                                 producer_agent)

    return d1
예제 #20
0
def bidsmri2project(directory, args):

    # initialize empty cde graph...it may get replaced if we're doing variable to term mapping or not
    cde=Graph()

    # Parse dataset_description.json file in BIDS directory
    if (os.path.isdir(os.path.join(directory))):
        try:
            with open(os.path.join(directory,'dataset_description.json')) as data_file:
                dataset = json.load(data_file)
        except OSError:
            logging.critical("Cannot find dataset_description.json file which is required in the BIDS spec")
            exit("-1")
    else:
        logging.critical("Error: BIDS directory %s does not exist!" %os.path.join(directory))
        exit("-1")

    # create project / nidm-exp doc
    project = Project()

    # if there are git annex sources then add them
    num_sources=addGitAnnexSources(obj=project.get_uuid(),bids_root=directory)
    # else just add the local path to the dataset
    if num_sources == 0:
        project.add_attributes({Constants.PROV['Location']:"file:/" + directory})


    # add various attributes if they exist in BIDS dataset
    for key in dataset:
        # if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object
        if key in BIDS_Constants.dataset_description:
            if type(dataset[key]) is list:
                project.add_attributes({BIDS_Constants.dataset_description[key]:"".join(dataset[key])})
            else:
                project.add_attributes({BIDS_Constants.dataset_description[key]:dataset[key]})




    # get BIDS layout
    bids_layout = BIDSLayout(directory)


    # create empty dictinary for sessions where key is subject id and used later to link scans to same session as demographics
    session={}
    participant={}
    # Parse participants.tsv file in BIDS directory and create study and acquisition objects
    if os.path.isfile(os.path.join(directory,'participants.tsv')):
        with open(os.path.join(directory,'participants.tsv')) as csvfile:
            participants_data = csv.DictReader(csvfile, delimiter='\t')

            # logic to map variables to terms.
            # first iterate over variables in dataframe and check which ones are already mapped as BIDS constants and which are not.  For those that are not
            # we want to use the variable-term mapping functions to help the user do the mapping
            # iterate over columns
            mapping_list=[]
            column_to_terms={}
            for field in participants_data.fieldnames:

                # column is not in BIDS_Constants
                if not (field in BIDS_Constants.participants):
                    # add column to list for column_to_terms mapping
                    mapping_list.append(field)



            #if user didn't supply a json mapping file but we're doing some variable-term mapping create an empty one for column_to_terms to use
            if args.json_map == False:
                #defaults to participants.json because here we're mapping the participants.tsv file variables to terms
                # if participants.json file doesn't exist then run without json mapping file
                if not os.path.isfile(os.path.join(directory,'participants.json')):
                    #maps variables in CSV file to terms
                    temp=DataFrame(columns=mapping_list)
                    if args.no_concepts:
                        column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv',
                            df=temp,output_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False)
                    else:
                        column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv',
                            df=temp,output_file=os.path.join(directory,'participants.json'),bids=True)
                else:
                    #maps variables in CSV file to terms
                    temp=DataFrame(columns=mapping_list)
                    if args.no_concepts:
                        column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                            output_file=os.path.join(directory,'participants.json'),json_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False)
                    else:
                        column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                            output_file=os.path.join(directory,'participants.json'),json_file=os.path.join(directory,'participants.json'),bids=True)
            else:
                #maps variables in CSV file to terms
                temp=DataFrame(columns=mapping_list)
                if args.no_concepts:
                    column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                        output_file=os.path.join(directory,'participants.json'),json_file=args.json_map,bids=True,associate_concepts=False)
                else:
                    column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                        output_file=os.path.join(directory,'participants.json'),json_file=args.json_map,bids=True)


            for row in participants_data:
                #create session object for subject to be used for participant metadata and image data
                #parse subject id from "sub-XXXX" string
                temp = row['participant_id'].split("-")
                #for ambiguity in BIDS datasets.  Sometimes participant_id is sub-XXXX and othertimes it's just XXXX
                if len(temp) > 1:
                    subjid = temp[1]
                else:
                    subjid = temp[0]
                logging.info(subjid)
                session[subjid] = Session(project)

                #add acquisition object
                acq = AssessmentAcquisition(session=session[subjid])

                acq_entity = AssessmentObject(acquisition=acq)
                participant[subjid] = {}
                participant[subjid]['person'] = acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']}))

                # add nfo:filename entry to assessment entity to reflect provenance of where this data came from
                acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(os.path.join(directory,'participants.tsv'),directory)})
                #acq_entity.add_attributes({Constants.NIDM_FILENAME:os.path.join(directory,'participants.tsv')})

                #add qualified association of participant with acquisition activity
                acq.add_qualified_association(person=participant[subjid]['person'],role=Constants.NIDM_PARTICIPANT)
                # print(acq)

                # if there are git annex sources for participants.tsv file then add them
                num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory)
                # else just add the local path to the dataset
                if num_sources == 0:
                    acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.tsv')})

                 # if there's a JSON sidecar file then create an entity and associate it with all the assessment entities
                if os.path.isfile(os.path.join(directory,'participants.json')):
                    json_sidecar = AssessmentObject(acquisition=acq)
                    json_sidecar.add_attributes({PROV_TYPE:QualifiedName(Namespace("bids",Constants.BIDS),"sidecar_file"), Constants.NIDM_FILENAME:
                        getRelPathToBIDS(os.path.join(directory,'participants.json'),directory)})

                    # add Git Annex Sources
                    # if there are git annex sources for participants.tsv file then add them
                    num_sources=addGitAnnexSources(obj=json_sidecar.get_uuid(),filepath=os.path.join(directory,'participants.json'),bids_root=directory)
                    # else just add the local path to the dataset
                    if num_sources == 0:
                        json_sidecar.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.json')})


                # check if json_sidecar entity exists and if so associate assessment entity with it
                if 'json_sidecar' in  locals():
                    #connect json_entity with acq_entity
                    acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_sidecar})

                for key,value in row.items():
                    if not value:
                        continue
                    #for variables in participants.tsv file who have term mappings in BIDS_Constants.py use those, add to json_map so we don't have to map these if user
                    #supplied arguments to map variables
                    if key in BIDS_Constants.participants:
                        # WIP
                        # Here we are adding to CDE graph data elements for BIDS Constants that remain fixed for each BIDS-compliant dataset

                        if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID):


                            # create a namespace with the URL for fixed BIDS_Constants term
                            # item_ns = Namespace(str(Constants.BIDS.namespace.uri))
                            # add prefix to namespace which is the BIDS fixed variable name
                            # cde.bind(prefix="bids", namespace=item_ns)
                            # ID for BIDS variables is always the same bids:[bids variable]
                            cde_id = Constants.BIDS[key]
                            # add the data element to the CDE graph
                            cde.add((cde_id,RDF.type, Constants.NIDM['DataElement']))
                            cde.add((cde_id,RDF.type, Constants.PROV['Entity']))
                            # add some basic information about this data element
                            cde.add((cde_id,Constants.RDFS['label'],Literal(BIDS_Constants.participants[key].localpart)))
                            cde.add((cde_id,Constants.NIDM['isAbout'],URIRef(BIDS_Constants.participants[key].uri)))
                            cde.add((cde_id,Constants.NIDM['source_variable'],Literal(key)))
                            cde.add((cde_id,Constants.NIDM['description'],Literal("participant/subject identifier")))
                            cde.add((cde_id,Constants.RDFS['comment'],Literal("BIDS participants_id variable fixed in specification")))
                            cde.add((cde_id,Constants.RDFS['valueType'],URIRef(Constants.XSD["string"])))

                            acq_entity.add_attributes({cde_id:Literal(value)})

                        # if this was the participant_id, we already handled it above creating agent / qualified association
                        # if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID):
                        #    acq_entity.add_attributes({BIDS_Constants.participants[key]:value})


                    # else if user added -mapvars flag to command line then we'll use the variable-> term mapping procedures to help user map variables to terms (also used
                    # in CSV2NIDM.py)
                    else:

                        # WIP: trying to add new support for CDEs...
                        add_attributes_with_cde(prov_object=acq_entity,cde=cde,row_variable=key,value=value)
                        # if key in column_to_terms:
                        #    acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(key)), column_to_terms[key]["url"]), ""):value})
                        # else:

                        #    acq_entity.add_attributes({Constants.BIDS[key.replace(" ", "_")]:value})


    # create acquisition objects for each scan for each subject

    # loop through all subjects in dataset
    for subject_id in bids_layout.get_subjects():
        logging.info("Converting subject: %s" %subject_id)
        # skip .git directories...added to support datalad datasets
        if subject_id.startswith("."):
            continue

        # check if there are a session numbers.  If so, store it in the session activity and create a new
        # sessions for these imaging acquisitions.  Because we don't know which imaging session the root
        # participants.tsv file data may be associated with we simply link the imaging acquisitions to different
        # sessions (i.e. the participants.tsv file goes into an AssessmentAcquisition and linked to a unique
        # sessions and the imaging acquisitions go into MRAcquisitions and has a unique session)
        imaging_sessions = bids_layout.get_sessions(subject=subject_id)
        # if session_dirs has entries then get any metadata about session and store in session activity

        # bids_layout.get(subject=subject_id,type='session',extensions='.tsv')
        # bids_layout.get(subject=subject_id,type='scans',extensions='.tsv')
        # bids_layout.get(extensions='.tsv',return_type='obj')

        # loop through each session if there is a sessions directory
        if len(imaging_sessions) > 0:
            for img_session in imaging_sessions:
                # create a new session
                ses = Session(project)
                # add session number as metadata
                ses.add_attributes({Constants.BIDS['session_number']:img_session})
                addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=ses,participant=participant, directory=directory,img_session=img_session)
        # else we have no ses-* directories in the BIDS layout
        addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=Session(project),participant=participant, directory=directory)



        # Added temporarily to support phenotype files
        # for each *.tsv / *.json file pair in the phenotypes directory
        # WIP: ADD VARIABLE -> TERM MAPPING HERE
        for tsv_file in glob.glob(os.path.join(directory,"phenotype","*.tsv")):
            # for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to
            # the associated JSON data dictionary file
            with open(tsv_file) as phenofile:
                pheno_data = csv.DictReader(phenofile, delimiter='\t')
                for row in pheno_data:
                    subjid = row['participant_id'].split("-")
                    if not subjid[1] == subject_id:
                        continue
                    else:
                        # add acquisition object
                        acq = AssessmentAcquisition(session=session[subjid[1]])
                        # add qualified association with person
                        acq.add_qualified_association(person=participant[subject_id]['person'],role=Constants.NIDM_PARTICIPANT)

                        acq_entity = AssessmentObject(acquisition=acq)



                        for key,value in row.items():
                            if not value:
                                continue
                            # we're using participant_id in NIDM in agent so don't add to assessment as a triple.
                            # BIDS phenotype files seem to have an index column with no column header variable name so skip those
                            if ((not key == "participant_id") and (key != "")):
                                # for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs..
                                acq_entity.add_attributes({Constants.BIDS[key]:value})

                        # link TSV file
                        acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(tsv_file,directory)})
                        #acq_entity.add_attributes({Constants.NIDM_FILENAME:tsv_file})

                        # if there are git annex sources for participants.tsv file then add them
                        num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory)
                        # else just add the local path to the dataset
                        if num_sources == 0:
                            acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + tsv_file})


                        # link associated JSON file if it exists
                        data_dict = os.path.join(directory,"phenotype",os.path.splitext(os.path.basename(tsv_file))[0]+ ".json")
                        if os.path.isfile(data_dict):
                            # if file exists, create a new entity and associate it with the appropriate activity  and a used relationship
                            # with the TSV-related entity
                            json_entity = AssessmentObject(acquisition=acq)
                            json_entity.add_attributes({PROV_TYPE:Constants.BIDS["sidecar_file"], Constants.NIDM_FILENAME:
                                getRelPathToBIDS(data_dict,directory)})

                            # add Git Annex Sources
                            # if there are git annex sources for participants.tsv file then add them
                            num_sources=addGitAnnexSources(obj=json_entity.get_uuid(),filepath=data_dict,bids_root=directory)
                            # else just add the local path to the dataset
                            if num_sources == 0:
                                json_entity.add_attributes({Constants.PROV['Location']:"file:/" + data_dict})

                            #connect json_entity with acq_entity
                            acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_entity.get_uuid()})


    return project, cde
    Namespace,
)
import numpy as np

from .common import calculate_provenance_network_metrics

POKEMON_GO_DATA_COLUMNS = [
    "n_balls_collected",
    "n_pokemons_captured",
    "n_pokemons_disposed",
    "strength_captured_avg",
    "strength_disposed_avg",
]

logger = logging.getLogger(__name__)
NS_PGO = Namespace("pgo", "http://sociam.org/pokemongo#")
PGO_strength = NS_PGO["strength"]


def create_graph_index(dataset_path, output_path):
    logger.debug("Working in folder: %s", dataset_path)
    dataset_path = Path(dataset_path)

    graph_index_filepath = dataset_path / "graphs.csv"
    if not graph_index_filepath.exists():
        logger.error("Graphs index file is not found: %s", graph_index_filepath)
        exit(1)

    logger.debug("Reading graphs index...")
    graphs = pd.read_csv(graph_index_filepath)
예제 #22
0
파일: provjson.py 프로젝트: vreuter/prov
def decode_json_container(jc, bundle):
    if 'prefix' in jc:
        prefixes = jc['prefix']
        for prefix, uri in prefixes.items():
            if prefix != 'default':
                bundle.add_namespace(Namespace(prefix, uri))
            else:
                bundle.set_default_namespace(uri)
        del jc['prefix']

    for rec_type_str in jc:
        rec_type = PROV_RECORD_IDS_MAP[rec_type_str]
        for rec_id, content in jc[rec_type_str].items():
            if hasattr(content, 'items'):  # it is a dict
                #  There is only one element, create a singleton list
                elements = [content]
            else:
                # expect it to be a list of dictionaries
                elements = content

            for element in elements:
                attributes = dict()
                other_attributes = []
                # this is for the multiple-entity membership hack to come
                membership_extra_members = None
                for attr_name, values in element.items():
                    attr = (PROV_ATTRIBUTES_ID_MAP[attr_name]
                            if attr_name in PROV_ATTRIBUTES_ID_MAP else
                            valid_qualified_name(bundle, attr_name))
                    if attr in PROV_ATTRIBUTES:
                        if isinstance(values, list):
                            # only one value is allowed
                            if len(values) > 1:
                                # unless it is the membership hack
                                if rec_type == PROV_MEMBERSHIP and \
                                   attr == PROV_ATTR_ENTITY:
                                    # This is a membership relation with
                                    # multiple entities
                                    # HACK: create multiple membership
                                    # relations, one for each entity

                                    # Store all the extra entities
                                    membership_extra_members = values[1:]
                                    # Create the first membership relation as
                                    # normal for the first entity
                                    value = values[0]
                                else:
                                    error_msg = (
                                        'The prov package does not support PROV'
                                        ' attributes having multiple values.')
                                    logger.error(error_msg)
                                    raise ProvJSONException(error_msg)
                            else:
                                value = values[0]
                        else:
                            value = values
                        value = (valid_qualified_name(bundle, value)
                                 if attr in PROV_ATTRIBUTE_QNAMES else
                                 parse_xsd_datetime(value))
                        attributes[attr] = value
                    else:
                        if isinstance(values, list):
                            other_attributes.extend(
                                (attr,
                                 decode_json_representation(value, bundle))
                                for value in values)
                        else:
                            # single value
                            other_attributes.append(
                                (attr,
                                 decode_json_representation(values, bundle)))
                bundle.new_record(rec_type, rec_id, attributes,
                                  other_attributes)
                # HACK: creating extra (unidentified) membership relations
                if membership_extra_members:
                    collection = attributes[PROV_ATTR_COLLECTION]
                    for member in membership_extra_members:
                        bundle.membership(collection,
                                          valid_qualified_name(bundle, member))
def example():

    g = ProvDocument()
    # Local namespace
    # Doesnt exist yet so we are creating it
    ap = Namespace('aip', 'https://araport.org/provenance/')
    # Dublin Core
    g.add_namespace("dcterms", "http://purl.org/dc/terms/")
    # FOAF
    g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")

    # Add sponsors and contributors as Agents
    # ap['matthew_vaughn']
    # aip:matthew_vaughn
    # https://araport.org/provenance/:matthew_vaughn
    # Learn this from a call to profiles service? Adds a dependency on Agave so I am open to figuring out another way
    me = g.agent(
        ap['matthew_vaughn'], {
            'prov:type': PROV["Person"],
            'foaf:givenName': "Matthew Vaughn",
            'foaf:mbox': "<mailto:[email protected]>"
        })
    # Hard coded for now
    walter = g.agent(
        ap['walter_moreira'], {
            'prov:type': PROV["Person"],
            'foaf:givenName': "Walter Moreira",
            'foaf:mbox': "<mailto:[email protected]>"
        })
    utexas = g.agent(
        ap['university_of_texas'], {
            'prov:type': PROV["Organization"],
            'foaf:givenName': "University of Texas at Austin"
        })

    # Set delegation to our host University
    # We may have trouble doing this for other users since we don't always capture their host instituion
    g.actedOnBehalfOf(walter, utexas)
    g.actedOnBehalfOf(me, utexas)

    # Include the ADAMA platform as an Agent and set attribution
    # dcterms:title and dcterms:description are hardcoded
    # dcterms:language is hard-coded
    # dcterms:source is the URI of the public git source repository for ADAMA
    # "dcterms:updated": "2015-04-17T09:44:56" - this would actually be the date ADAMA was updated
    adama_platform = g.agent(
        ap['adama_platform'], {
            'dcterms:title': "ADAMA",
            'dcterms:description': "Araport Data and Microservices API",
            'dcterms:language': "en-US",
            'dcterms:identifier': "https://api.araport.org/community/v0.3/",
            'dcterms:updated': "2015-04-17T09:44:56"
        })
    g.wasGeneratedBy(adama_platform, walter)

    # Include the ADAMA microservice as an Agent and set attribution+delegation
    # dcterms:title and dcterms:description are inherited from the service's metadata
    # dcterms:language is hard-coded
    # dcterms:identifier is the deployment URI for the service
    # dcterms:source is the URI of the public git source repository. The URL in this example is just a dummy
    #
    # The name for each microservice should be unique. We've decided to
    # use the combination of namespace, service name, and version
    microservice_name = 'mwvaughn/bar_annotation_v1.0.0'
    adama_microservice = g.agent(
        ap[microservice_name], {
            'dcterms:title':
            "BAR Annotation Service",
            'dcterms:description':
            "Returns annotation from locus ID",
            'dcterms:language':
            "en-US",
            'dcterms:identifier':
            "https://api.araport.org/community/v0.3/mwvaughn/bar_annotation_v1.0.0",
            'dcterms:source':
            "https://github.com/Arabidopsis-Information-Portal/prov-enabled-api-sample"
        })

    # the microservice was generated by me on date X (don't use now, use when the service was updated)
    g.wasGeneratedBy(adama_microservice, me, datetime.datetime.now())
    # The microservice used the platform now
    g.used(adama_microservice, adama_platform, datetime.datetime.now())

    # Sources
    #
    # Define BAR
    # Agents
    nick = g.agent(
        ap['nicholas_provart'], {
            'prov:type': PROV["Person"],
            'foaf:givenName': "Nicholas Provart",
            'foaf:mbox': "*****@*****.**"
        })
    utoronto = g.agent(
        ap['university_of_toronto'], {
            'prov:type': PROV["Organization"],
            'foaf:givenName': "University of Toronto",
            'dcterms:identifier': "http://www.utoronto.ca/"
        })
    g.actedOnBehalfOf(nick, utoronto)

    # Entity
    # All fields derived from Sources.yml
    # dcterms:title and dcterms:description come straight from the YAML
    # dcterms:identifier - URI pointing to the source's canonical URI representation
    # optional - dcterms:language: Recommended best practice is to use a controlled vocabulary such as RFC 4646
    # optional - dcterms:updated: date the source was published or last updated
    # optional - dcterms:license: Simple string or URI to license. Validate URI if provided?
    datasource1 = g.entity(
        ap['datasource1'], {
            'dcterms:title': "BAR Arabidopsis AGI -> Annotation",
            'dcterms:description': "Most recent annotation for given AGI",
            'dcterms:language': "en-US",
            'dcterms:identifier':
            "http://bar.utoronto.ca/webservices/agiToAnnot.php",
            'dcterms:updated': "2015-04-17T09:44:56",
            'dcterms:license': "Creative Commons 3.0"
        })
    # Set up attribution to Nick
    g.wasAttributedTo(datasource1, nick)

    # Define TAIR
    # Agents
    # dcterms:language: Recommended best practice is to use a controlled vocabulary such as RFC 4646
    eva = g.agent(ap['eva_huala'], {
        'prov:type': PROV["Person"],
        'foaf:givenName': "Eva Huala"
    })
    phoenix = g.agent(
        ap['phoenix_bioinformatics'], {
            'prov:type': PROV["Organization"],
            'foaf:givenName': "Phoenix Bioinformatics"
        })
    g.actedOnBehalfOf(eva, phoenix)

    # Entity
    # All fields derived from Sources.yml
    # optional - dcterms:citation: Plain text bibliographic citation. If only provided as doi, should we try to validate it?
    datasource2 = g.entity(
        ap['datasource2'], {
            'dcterms:title':
            "TAIR",
            'dcterms:description':
            "The Arabidopsis Information Resource",
            'dcterms:language':
            "en-US",
            'dcterms:identifier':
            "https://www.arabidopsis.org/",
            'dcterms:citation':
            "The Arabidopsis Information Resource (TAIR): improved gene annotation and new tools. Nucleic Acids Research 2011 doi: 10.1093/nar/gkr1090"
        })
    g.wasAttributedTo(datasource2, eva)

    # In Sources.yml, these two sources are nested. Define that relationship here
    # There are other types of relationships but we will just use derived from for simplicity in this prototype
    g.wasDerivedFrom(ap['datasource1'], ap['datasource2'])

    # Depending on which ADAMA microservice type we are using, define an activity
    # Eventually, break these into more atomic actions in a chain
    action1 = g.activity(ap['do_query'], datetime.datetime.now())
    # action1 = g.activity(ap['do_map'], datetime.datetime.now())
    # action1 = g.activity(ap['do_generic'], datetime.datetime.now())
    # action1 = g.activity(ap['do_passthrough'], datetime.datetime.now())
    # Future... Support for ADAMA-native microservices
    # action1 = g.activity(ap['generate'], datetime.datetime.now())

    # Define current ADAMA response as an Entity
    # This is what's being returned to the user and is thus the subject of the PROV record
    # May be able to add more attributes to it but this is the minimum
    response = g.entity(ap['adama_response'])

    # Response is generated by the process_query action
    # Time-stamp it!
    g.wasGeneratedBy(response, ap['do_query'], datetime.datetime.now())
    # The process_query used the microservice
    g.used(ap['do_query'], adama_microservice, datetime.datetime.now())
    # The microservice used datasource1
    g.used(adama_microservice, datasource1, datetime.datetime.now())

    # Print prov_n
    print(g.get_provn())
    # Print prov-json
    print(g.serialize())
    # Write out as a pretty picture
    graph = prov.dot.prov_to_dot(g)
    graph.write_png('Sources.png')
예제 #24
0
def w3c_publication_2():
    # https://github.com/lucmoreau/ProvToolbox/blob/master/asn/src/test/resources/prov/w3c-publication2.prov-asn
    #===========================================================================
    # bundle
    #
    # prefix ex <http://example.org/>
    # prefix rec <http://example.org/record>
    #
    # prefix w3 <http://www.w3.org/TR/2011/>
    # prefix hg <http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/>
    #
    #
    # entity(hg:Overview.html, [ prov:type="file in hg" ])
    # entity(w3:WD-prov-dm-20111215, [ prov:type="html4" ])
    #
    #
    # activity(ex:rcp,-,-,[prov:type="copy directory"])
    #
    # wasGeneratedBy(rec:g; w3:WD-prov-dm-20111215, ex:rcp, -)
    #
    # entity(ex:req3, [ prov:type="http://www.w3.org/2005/08/01-transitions.html#pubreq" %% xsd:anyURI ])
    #
    # used(rec:u; ex:rcp,hg:Overview.html,-)
    # used(ex:rcp, ex:req3, -)
    #
    #
    # wasDerivedFrom(w3:WD-prov-dm-20111215, hg:Overview.html, ex:rcp, rec:g, rec:u)
    #
    # agent(ex:webmaster, [ prov:type='prov:Person' ])
    #
    # wasAssociatedWith(ex:rcp, ex:webmaster, -)
    #
    # endBundle
    #===========================================================================

    ex = Namespace('ex', 'http://example.org/')
    rec = Namespace('rec', 'http://example.org/record')
    w3 = Namespace('w3', 'http://www.w3.org/TR/2011/')
    hg = Namespace(
        'hg',
        'http://dvcs.w3.org/hg/prov/raw-file/9628aaff6e20/model/releases/WD-prov-dm-20111215/'
    )

    g = ProvDocument()

    g.entity(hg['Overview.html'], {'prov:type': "file in hg"})
    g.entity(w3['WD-prov-dm-20111215'], {'prov:type': "html4"})

    g.activity(ex['rcp'], None, None, {'prov:type': "copy directory"})

    g.wasGeneratedBy('w3:WD-prov-dm-20111215', 'ex:rcp', identifier=rec['g'])

    g.entity(
        'ex:req3', {
            'prov:type':
            Identifier("http://www.w3.org/2005/08/01-transitions.html#pubreq")
        })

    g.used('ex:rcp', 'hg:Overview.html', identifier='rec:u')
    g.used('ex:rcp', 'ex:req3')

    g.wasDerivedFrom('w3:WD-prov-dm-20111215', 'hg:Overview.html', 'ex:rcp',
                     'rec:g', 'rec:u')

    g.agent('ex:webmaster', {'prov:type': "Person"})

    g.wasAssociatedWith('ex:rcp', 'ex:webmaster')

    return g
from prov.dot import prov_to_dot

import db

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

output_path = Path("outputs")

# Provenance initialisation
# Namespaces
ns_type_uri = "https://openprovenance.org/ns/mimic#"
ns_data_uri = "https://mimic.physionet.org/mimiciii/1.4"

# Types
ns_type = Namespace("mimic", ns_type_uri)
ns_attrs = Namespace("attrs", ns_type_uri + "attr_")

# Data
gen_data_ns = lambda name: Namespace(name, f"{ns_data_uri}/{name}/")
ns_patients = gen_data_ns("patients")
ns_unit = gen_data_ns("units")
ns_staff = gen_data_ns("staff")
ns_process = gen_data_ns("process")
ns_admissions = gen_data_ns("admissions")
ns_stay = gen_data_ns("stay")
ns_procedures = gen_data_ns("procedures")

all_namespaces = [
    ns_type,
    ns_attrs,
예제 #26
0
def toW3Cprov(ling, bundl, format='w3c-prov-xml'):

    g = ProvDocument()
    vc = Namespace(
        "knmi", "http://knmi.nl"
    )  # namespaces do not need to be explicitly added to a document
    con = Namespace("dfp", "http://dispel4py.org")
    g.add_namespace("dcterms", "http://purl.org/dc/terms/")

    'specify bundle'
    bundle = None
    for trace in bundl:
        'specifing user'
        ag = g.agent(
            vc[trace["username"]],
            other_attributes={"dcterms:author": trace["username"]}
        )  # first time the ex namespace was used, it is added to the document automatically

        if trace['type'] == 'workflow_run':

            trace.update({'runId': trace['_id']})
            bundle = g.bundle(vc[trace["runId"]])
            bundle.actedOnBehalfOf(vc[trace["runId"]], vc[trace["username"]])

            dic = {}
            i = 0

            for key in trace:

                if key != "input":
                    if ':' in key:
                        dic.update({key: trace[key]})
                    else:
                        dic.update({vc[key]: trace[key]})

            dic.update({'prov:type': PROV['Bundle']})
            g.entity(vc[trace["runId"]], dic)

            dic = {}
            i = 0
            if type(trace['input']) != list:
                trace['input'] = [trace['input']]
            for y in trace['input']:
                for key in y:
                    if ':' in key:
                        dic.update({key: y[key]})
                    else:
                        dic.update({vc[key]: y[key]})
                dic.update({'prov:type': 'worklfow_input'})
                bundle.entity(vc[trace["_id"] + "_" + str(i)], dic)
                bundle.used(vc[trace["_id"]],
                            vc[trace["_id"] + "_" + str(i)],
                            identifier=vc["used_" + trace["_id"] + "_" +
                                          str(i)])
                i = i + 1

    'specify lineage'
    for trace in ling:

        #pprint(trace)

        try:
            bundle = g.bundle(vc[trace["runId"]])
            bundle.wasAttributedTo(vc[trace["runId"]],
                                   vc["ag_" + trace["username"]],
                                   identifier=vc["attr_" + trace["runId"]])

        except:
            pass
        'specifing creator of the activity (to be collected from the registy)'

        if 'creator' in trace:
            bundle.agent(
                vc["ag_" + trace["creator"]],
                other_attributes={"dcterms:creator": trace["creator"]}
            )  # first time the ex namespace was used, it is added to the document automatically
            bundle.wasAssociatedWith('process_' + trace["iterationId"],
                                     vc["ag_" + trace["creator"]])
            bundle.wasAttributedTo(vc[trace["runId"]],
                                   vc["ag_" + trace["creator"]])

        'adding activity information for lineage'
        dic = {}
        for key in trace:

            if type(trace[key]) != list:
                if ':' in key:
                    dic.update({key: trace[key]})
                else:

                    if key == 'location':

                        dic.update({"prov:location": trace[key]})
                    else:
                        dic.update({vc[key]: trace[key]})
        bundle.activity(vc["process_" + trace["iterationId"]],
                        trace["startTime"], trace["endTime"],
                        dic.update({'prov:type': trace["name"]}))

        'adding parameters to the document as input entities'
        dic = {}
        for x in trace["parameters"]:
            #print x
            if ':' in x["key"]:
                dic.update({x["key"]: x["val"]})
            else:
                dic.update({vc[x["key"]]: x["val"]})

        dic.update({'prov:type': 'parameters'})
        bundle.entity(vc["parameters_" + trace["instanceId"]], dic)
        bundle.used(vc['process_' + trace["iterationId"]],
                    vc["parameters_" + trace["instanceId"]],
                    identifier=vc["used_" + trace["iterationId"]])

        'adding input dependencies to the document as input entities'
        dic = {}

        for x in trace["derivationIds"]:
            'state could be added'
            #dic.update({'prov:type':'parameters'})
            bundle.used(vc['process_' + trace["iterationId"]],
                        vc[x["DerivedFromDatasetID"]],
                        identifier=vc["used_" + x["DerivedFromDatasetID"]])

        'adding entities to the document as output metadata'
        for x in trace["streams"]:
            i = 0
            parent_dic = {}
            for key in x:
                if key == 'con:immediateAccess':

                    parent_dic.update({vc['immediateAccess']: x[key]})

                elif key == 'location':

                    parent_dic.update({"prov:location": str(x[key])})
                else:
                    parent_dic.update({vc[key]: str(x[key])})

            c1 = bundle.collection(vc[x["id"]], other_attributes=parent_dic)
            bundle.wasGeneratedBy(vc[x["id"]],
                                  vc["process_" + trace["iterationId"]],
                                  identifier=vc["wgb_" + x["id"]])

            for d in trace['derivationIds']:
                bundle.wasDerivedFrom(vc[x["id"]],
                                      vc[d['DerivedFromDatasetID']],
                                      identifier=vc["wdf_" + x["id"]])

            for y in x["content"]:

                dic = {}

                if isinstance(y, dict):
                    val = None
                    for key in y:

                        try:
                            val = num(y[key])

                        except Exception, e:
                            val = str(y[key])

                        if ':' in key:
                            dic.update({key: val})
                        else:
                            dic.update({vc[key]: val})
                else:
                    dic = {vc['text']: y}

                dic.update({"verce:parent_entity": vc["data_" + x["id"]]})

                print x["id"]
                print str(i)
                print dic

                e1 = bundle.entity(vc["data_" + x["id"] + "_" + str(i)], dic)

                bundle.hadMember(c1, e1)
                bundle.wasGeneratedBy(vc["data_" + x["id"] + "_" + str(i)],
                                      vc["process_" + trace["iterationId"]],
                                      identifier=vc["wgb_" + x["id"] + "_" +
                                                    str(i)])

                for d in trace['derivationIds']:
                    bundle.wasDerivedFrom(
                        vc["data_" + x["id"] + "_" + str(i)],
                        vc[d['DerivedFromDatasetID']],
                        identifier=vc["wdf_" + "data_" + x["id"] + "_" +
                                      str(i)])

                i = i + 1