Esempio n. 1
0
 def __init__(
     self,
     research_object: "ResearchObject",
     full_name: str,
     host_provenance: bool,
     user_provenance: bool,
     orcid: str,
     fsaccess: StdFsAccess,
     run_uuid: Optional[uuid.UUID] = None,
 ) -> None:
     """Initialize the provenance profile."""
     self.fsaccess = fsaccess
     self.orcid = orcid
     self.research_object = research_object
     self.folder = self.research_object.folder
     self.document = ProvDocument()
     self.host_provenance = host_provenance
     self.user_provenance = user_provenance
     self.engine_uuid = research_object.engine_uuid  # type: str
     self.add_to_manifest = self.research_object.add_to_manifest
     if self.orcid:
         _logger.debug("[provenance] Creator ORCID: %s", self.orcid)
     self.full_name = full_name
     if self.full_name:
         _logger.debug("[provenance] Creator Full name: %s", self.full_name)
     self.workflow_run_uuid = run_uuid or uuid.uuid4()
     self.workflow_run_uri = self.workflow_run_uuid.urn  # type: str
     self.generate_prov_doc()
def calculate_flat_provenance_types(
    prov_doc: ProvDocument,
    to_level: int = 0,
    including_primitives_types: bool = True,
    counting_wdf_as_two: bool = False,
    ignored_types: Iterable[str] = ϕ,
) -> MultiLevelTypeDict:
    # flatten all the bundles, if any
    prov_doc = prov_doc.flattened()

    # initialise index structures
    level0_types = defaultdict(
        set)  # type: Dict[QualifiedName, Set[QualifiedName]]
    predecessors = defaultdict(
        set
    )  # type: Dict[QualifiedName, Set[Tuple[QualifiedName, QualifiedName]]]

    types_to_ignore: FrozenSet[str] = frozenset(ignored_types)

    # indexing node types and relations
    for rec in prov_doc.get_records():  # type: ProvRecord
        if rec.is_element():
            level0_types[rec.identifier] |= get_element_types(
                rec, including_primitives_types, types_to_ignore)
        elif rec.is_relation():
            rel_type = rec.get_type()
            attrs, values = zip(*rec.formal_attributes)
            # expecting a QualifiedName from the first argument of a relation
            predecessor, successor = values[:2]
            if predecessor is not None and successor is not None:
                predecessors[successor].add((rel_type, predecessor))

    # the type map for this graph
    fp_types = defaultdict(dict)  # type: MultiLevelTypeDict
    # converting type sets to FlatProvenanceType level 0
    fp_types[0] = {
        node: (frozenset(level0_types[node]), )
        for node in level0_types
    }
    # propagating level-0 types to the specified level
    for k in range(1, to_level + 1):
        # only propagating (k-1) types from nodes that have them
        for node, types in fp_types[k - 1].items():
            # propagating the types to the predecessors
            for rel_type, predecessor in predecessors[node]:
                k_type = types + (frozenset({rel_type}),
                                  )  # type: FlatProvenanceType
                if counting_wdf_as_two and (rel_type == PROV_DERIVATION):
                    k_p1_type = k_type + (frozenset({rel_type}),
                                          )  # type: FlatProvenanceType
                    fp_types[k + 1][predecessor] = (
                        join_flat_types(fp_types[k +
                                                 1][predecessor], k_p1_type)
                        if predecessor in fp_types[k + 1] else k_p1_type)
                else:
                    fp_types[k][predecessor] = (join_flat_types(
                        fp_types[k][predecessor], k_type) if predecessor
                                                in fp_types[k] else k_type)

    return fp_types
Esempio n. 3
0
def read(source, format=None):
    """
    Convenience function returning a ProvDocument instance.

    It does a lazy format detection by simply using try/except for all known
    formats. The deserializers should fail fairly early when data of the
    wrong type is passed to them thus the try/except is likely cheap. One
    could of course also do some more advanced format auto-detection but I am
    not sure that is necessary.

    The downside is that no proper error messages will be produced, use the
    format parameter to get the actual traceback.
    """
    # Lazy imports to not globber the namespace.
    from prov.model import ProvDocument

    from prov.serializers import Registry
    Registry.load_serializers()
    serializers = Registry.serializers.keys()

    if format:
        return ProvDocument.deserialize(source=source, format=format.lower())

    for format in serializers:
        try:
            return ProvDocument.deserialize(source=source, format=format)
        except:
            pass
    else:
        raise TypeError("Could not read from the source. To get a proper "
                        "error message, specify the format with the 'format' "
                        "parameter.")
def base_connector_record_parameter_example():
    """
    Returns a dict with attributes and metadata for a simple node

    :return:dict with attributes metadata
    :rtype: dict
    """
    doc = ProvDocument()

    namespaces = dict()
    namespaces.update({"ex": "http://example.com"})
    namespaces.update({"custom": "http://custom.com"})

    type_map = dict()
    type_map.update({"int value": "int"})
    type_map.update({"date value": "xds:datetime"})

    metadata = dict()

    metadata.update(
        {METADATA_KEY_PROV_TYPE: doc.valid_qualified_name("prov:Activity")})
    metadata.update({
        METADATA_KEY_IDENTIFIER:
        doc.valid_qualified_name("prov:example_node")
    })
    metadata.update({METADATA_KEY_TYPE_MAP: type_map})
    metadata.update({METADATA_KEY_NAMESPACES: namespaces})

    return_data = dict()
    return_data.update({"attributes": attributes_dict_example()})
    return_data.update({"metadata": metadata})

    return return_data
    def __init__(self, database_helper, full_provenance=False):
        """
        Initializes the provenance for the mjclawar_rarshad project

        Parameters
        ----------
        database_helper: DatabaseHelper
        full_provenance: bool

        Returns
        -------
        """
        assert isinstance(database_helper, DatabaseHelper)

        self.database_helper = database_helper
        if full_provenance:
            self.prov_doc = ProvDocument.deserialize(dir_info.plan_json)
        else:
            self.prov_doc = ProvDocument()
        self.prov_doc.add_namespace(mcras.BDP_NAMESPACE.name,
                                    mcras.BDP_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.ALG_NAMESPACE.name,
                                    mcras.ALG_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.DAT_NAMESPACE.name,
                                    mcras.DAT_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.LOG_NAMESPACE.name,
                                    mcras.LOG_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.ONT_NAMESPACE.name,
                                    mcras.ONT_NAMESPACE.link)
Esempio n. 6
0
def enforce_uniqueness_constraints(graph: ProvDocument) -> ProvDocument:
    """Enforce model uniqueness constraints.

    Remove node duplicates:
        - ProvDocument.unified takes care of this by removing nodes with
        the same id.

    Remove relation duplicates:
        - Allow only one relation of a certain type between two nodes.

    Enforcing this constraint after having populated the model instead of
    during population simplifies the model creation.
    """
    records, known = [], set()

    for relation in graph.get_records(ProvRelation):
        (_, source), (_, target) = relation.formal_attributes[:2]
        rel_tuple = (type(relation), source, target)
        if rel_tuple in known:
            continue
        known.add(rel_tuple)
        records.append(relation)

    records.extend(graph.get_records(ProvElement))

    g = ProvDocument(records)
    return g.unified()
 def test_namespace_inheritance(self):
     prov_doc = ProvDocument()
     prov_doc.add_namespace('ex', 'http://www.example.org/')
     bundle = prov_doc.bundle('ex:bundle')
     e1 = bundle.entity('ex:e1')
     self.assertIsNotNone(e1.identifier, "e1's identifier is None!")
     self.do_tests(prov_doc)
Esempio n. 8
0
def get_provdoc(format, infile):
    if format == "json":
        return ProvDocument.deserialize(infile)
    elif format == "xml":
        return ProvDocument.deserialize(infile, format='xml')
    else:
        print "Error: unsupported format (xml and json are supported"
Esempio n. 9
0
 def test_namespace_inheritance(self):
     prov_doc = ProvDocument()
     prov_doc.add_namespace('ex', 'http://www.example.org/')
     bundle = prov_doc.bundle('ex:bundle')
     e1 = bundle.entity('ex:e1')
     self.assertIsNotNone(e1.identifier, "e1's identifier is None!")
     self.do_tests(prov_doc)
Esempio n. 10
0
def get_provdoc(format,infile):
    if format == "json":
       return ProvDocument.deserialize(infile)
    elif format == "xml":
       return ProvDocument.deserialize(infile,format='xml')
    else:
       print "Error: unsupported format (xml and json are supported"
Esempio n. 11
0
 def test_default_namespace_inheritance(self):
     prov_doc = ProvDocument()
     prov_doc.set_default_namespace('http://www.example.org/')
     bundle = prov_doc.bundle('bundle')
     e1 = bundle.entity('e1')
     self.assertIsNotNone(e1.identifier, "e1's identifier is None!")
     self.assertRoundTripEquivalence(prov_doc)
Esempio n. 12
0
def diff(diff: DiffModel, document: provo.ProvDocument):
    print_msg("  Exporting module dependency comparison")
    added, removed, replaced = diff.modules

    for module in added:  # type: Module
        _create_module_dep(module, document, suffix="_a")
        document.wasGeneratedBy("module{}_a".format(module.id),
                                "trial{}Execution".format(diff.trial2.id), None,
                                "module{}AddDep".format(module.id),
                                [(provo.PROV_ROLE, "dependencyAddition")])

    for module in removed:  # type: Module
        _create_module_dep(module, document, suffix="_r")
        document.wasInvalidatedBy("module{}_r".format(module.id),
                                  "trial{}Execution".format(diff.trial2.id), None,
                                  "module{}RemoveDep".format(module.id),
                                  [(provo.PROV_ROLE, "dependencyRemoval")])

    for (mod_removed, mod_added) in replaced:  # type: Module
        _create_module_dep(mod_added, document, suffix="_a")
        document.wasGeneratedBy("module{}_a".format(mod_added.id),
                                "trial{}Execution".format(diff.trial2.id), None,
                                "module{}AddDep".format(mod_added.id),
                                [(provo.PROV_ROLE, "dependencyAddition")])

        _create_module_dep(mod_removed, document, suffix="_r")
        document.wasInvalidatedBy("module{}_r".format(mod_removed.id),
                                  "trial{}Execution".format(diff.trial2.id), None,
                                  "module{}RemoveDep".format(mod_removed.id),
                                  [(provo.PROV_ROLE, "dependencyRemoval")])

        document.wasRevisionOf("module{}_a".format(mod_added.id),
                               "module{}_r".format(mod_removed.id),
                               "trial{}Execution".format(diff.trial2.id), None, None, None,
                               [(provo.PROV_TYPE, "dependencyReplacement")])
Esempio n. 13
0
 def test_default_namespace_inheritance(self):
     prov_doc = ProvDocument()
     prov_doc.set_default_namespace("http://www.example.org/")
     bundle = prov_doc.bundle("bundle")
     e1 = bundle.entity("e1")
     self.assertIsNotNone(e1.identifier, "e1's identifier is None!")
     self.do_tests(prov_doc)
Esempio n. 14
0
    def parse(self):
        """ 
        Parse a result directory to extract the pieces information to be 
        stored in NIDM-Results. 
        """
        # Methods: find_software, find_model_fitting, find_contrasts and
        # find_inferences should be defined in the children classes and return
        # a list of NIDM Objects as specified in the objects module

        # Object of type Software describing the neuroimaging software package
        # used for the analysis
        self.software = self._find_software()

        # List of objects of type ModelFitting describing the model fitting
        # step in NIDM-Results (main activity: Model Parameters Estimation)
        self.model_fittings = self._find_model_fitting()

        # Dictionary of (key, value) pairs where where key is a tuple
        # containing the identifier of a ModelParametersEstimation object and a
        # tuple of identifiers of ParameterEstimateMap objects and value is an
        # object of type Contrast describing the contrast estimation step in
        # NIDM-Results (main activity: Contrast Estimation)
        self.contrasts = self._find_contrasts()

        # Inference activity and entities
        # Dictionary of (key, value) pairs where key is the identifier of a
        # ContrastEstimation object and value is an object of type Inference
        # describing the inference step in NIDM-Results (main activity:
        # Inference)
        self.inferences = self._find_inferences()

        # Initialise prov document
        self.doc = ProvDocument()
        self._add_namespaces()
Esempio n. 15
0
    def get_document_as_prov(self, document_id=None):
        """
        Get a ProvDocument from the database based on the document_id
        :param document_id: The id as a sting value
        :return: ProvDocument
        """
        if type(document_id) is not str:
            raise InvalidArgumentTypeException()

        raw_doc = self._adapter.get_document(document_id)

        # parse document
        prov_document = ProvDocument()
        for record in raw_doc.document.records:
            self._parse_record(prov_document, record)

        for bundle in raw_doc.bundles:
            prefixed_identifier = bundle.bundle_record.metadata[
                METADATA_KEY_IDENTIFIER]
            # remove prefix
            identifier = prefixed_identifier[
                len(PROV_API_BUNDLE_IDENTIFIER_PREFIX) - 2:]
            prov_bundle = prov_document.bundle(identifier=identifier)

            for record in bundle.records:
                self._parse_record(prov_bundle, record)
        return prov_document
Esempio n. 16
0
    def deriveDependency(self, aDO, aRO, derivedList):

        d1 = ProvDocument()  # d1 is now an empty provenance document
        d1.add_namespace("dt", "http://cs.ncl.ac.uk/dtsim/")
        e1 = d1.entity(DTns + aRO.id)  # deriving
        ag1 = d1.agent(DTns + str(aDO.id))
        for der in derivedList:
            # create provlet
            e2 = d1.entity(DTns + der.id)  # derived
            d1.wasAttributedTo(e2, ag1)
            d1.wasDerivedFrom(e2, e1)

            # update upstream pointer
            der.upstream = [(aRO, None)]  # aRO is upstream from aRO with no activity

            # update downstream
            aRO.downstream.append((der, None))  # aR1 is downstream from aR1 with no activity

        # update global graph
        e1 = pGlobal.entity(DTns + aRO.id)  # deriving
        ag1 = pGlobal.agent(DTns + str(aDO.id))
        pGlobal.wasAttributedTo(e2, ag1)
        for der in derivedList:
            e2 = pGlobal.entity(DTns + der.id)  # derived
            pGlobal.wasDerivedFrom(e2, e1)

        # trigger credit recomputation
        for der in derivedList:
            # aRO needs its credit updated with aRO1.credit
            aCreditManager.addDerivationCredit(aRO, der.currentTotalCredit)

        # 		self.notify(d1)
        return d1
Esempio n. 17
0
def primer():
        a  = ProvDocument()
        script_path = os.path.dirname(os.path.abspath( __file__ )) #
        with open(str(script_path) + "/output.json") as json_file:
                line = json_file.readline()
                a =  a.deserialize(content=line)
        return a
Esempio n. 18
0
def primer():
    a = ProvDocument()
    script_path = os.path.dirname(os.path.abspath(__file__))  #
    with open(str(script_path) + "/output.json") as json_file:
        line = json_file.readline()
        a = a.deserialize(content=line)
    return a
Esempio n. 19
0
def provRead(source, format=None):
    from prov.model import ProvDocument
    from prov.serializers import Registry

    Registry.load_serializers()
    serializers = Registry.serializers.keys()

    if format:
        try:
            ret = ProvDocument.deserialize(source=source,
                                           format=format.lower())
            return ret
        except Exception as e:
            log.error(e)
            raise TypeError(e)

    for format in serializers:
        source.seek(0)
        try:
            return ProvDocument.deserialize(source=source, format=format)
        except:
            pass
    else:
        raise TypeError("Could not read from the source. To get a proper "
                        "error message, specify the format with the 'format' "
                        "parameter.")
Esempio n. 20
0
def add_parents(graph: ProvDocument, package: CommitModelPackage) -> ProvDocument:
    """Add link between commit activities and their parents."""
    commit = package.commit
    for parent in package.parent_commits:
        graph.activity(*parent)
        graph.activity(*commit)
        graph.wasInformedBy(commit.id, parent.id)
    return graph
Esempio n. 21
0
File: prov.py Progetto: jvfe/BioProv
    def __init__(
        self,
        project,
        add_attributes=False,
        add_users=True,
        _add_project_namespaces=True,
        _iter_samples=True,
        _iter_project=True,
    ):
        """
        Constructs the W3C-PROV document for a project.

        :param Project project: instance of bioprov.src.Project.
        :param bool add_attributes: whether to add object attributes.
        :param bool add_users: whether to add users and environments.
        :param bool _add_project_namespaces:
        :param bool _iter_samples:
        :param bool _iter_project:
        """

        # Assert Project is good before constructing instance
        assert isinstance(project,
                          Project), Warnings()["incorrect_type"](project,
                                                                 Project)
        self.ProvDocument = ProvDocument()
        self.project = project
        self.project.document = self.ProvDocument
        self._dot = prov_to_dot(self.ProvDocument)
        self._provn = self.ProvDocument.get_provn()
        self._entities = dict()
        self._activities = dict()
        self._agents = dict()
        self._user_bundles = dict()
        self._provstore_document = None

        # Don't add attributes if you plan on exporting to graphic format
        self.add_attributes = add_attributes

        # Set this before running Namespaces
        if add_users:
            self._create_envs_and_users = True

        else:
            self._create_envs_and_users = False

        # Default actions to create the document
        if _add_project_namespaces:
            self._add_project_namespaces()

        if self._create_envs_and_users:
            self._iter_envs_and_users()

        if _iter_project:
            self._iter_project()

        if _iter_samples:
            self._iter_samples()
Esempio n. 22
0
    def test_xsd_qnames(self):
        prov_doc = ProvDocument()
        ex = Namespace('ex', 'http://www.example.org')
        prov_doc.add_namespace(ex)

        an_xsd_qname = XSDQName(ex['a_value'])
        prov_doc.entity('ex:e1', {'prov:value': an_xsd_qname})

        self.assertPROVJSONRoundTripEquivalence(prov_doc)
Esempio n. 23
0
def document_with_n_bundles_having_default_namespace(n):
    prov_doc = ProvDocument()
    prov_doc.add_namespace("ex", "http://www.example.org/")
    for i in range(n):
        x = str(i + 1)
        bundle = prov_doc.bundle("ex:bundle/" + x)
        bundle.set_default_namespace("http://www.example.org/default/" + x)
        bundle.entity("e")
    return prov_doc
Esempio n. 24
0
def add_commit(graph: ProvDocument, package: CommitModelPackage) -> ProvDocument:
    """Add commit activity, agents for author and committer, relations between agents and activity."""
    author, committer, commit = package.author, package.committer, package.commit
    graph.agent(*author)
    graph.agent(*committer)
    graph.activity(*commit)
    graph.wasAssociatedWith(commit.id, author.id)
    graph.wasAssociatedWith(commit.id, committer.id)
    return graph
Esempio n. 25
0
def diff(diff: DiffModel, document: provo.ProvDocument):
    print_msg("  Exporting basic trial comparison information")
    _create_trial_info(document, diff.trial1, "_{}".format(diff.trial1.id))
    _create_trial_info(document, diff.trial2, "_{}".format(diff.trial2.id))

    document.wasInfluencedBy("trial{}Execution".format(diff.trial2.id),
                             "trial{}Execution".format(diff.trial1.id),
                             "trial{}ComparedTo{}".format(diff.trial2.id, diff.trial1.id),
                             [(provo.PROV_TYPE, "comparison")])
Esempio n. 26
0
def document_with_n_bundles_having_default_namespace(n):
    prov_doc = ProvDocument()
    prov_doc.add_namespace('ex', 'http://www.example.org/')
    for i in range(n):
        x = str(i + 1)
        bundle = prov_doc.bundle('ex:bundle/' + x)
        bundle.set_default_namespace('http://www.example.org/default/' + x)
        bundle.entity('e')
    return prov_doc
Esempio n. 27
0
def document_with_n_bundles_having_default_namespace(n):
    prov_doc = ProvDocument()
    prov_doc.add_namespace('ex', 'http://www.example.org/')
    for i in range(n):
        x = str(i + 1)
        bundle = prov_doc.bundle('ex:bundle/' + x)
        bundle.set_default_namespace('http://www.example.org/default/' + x)
        bundle.entity('e')
    return prov_doc
Esempio n. 28
0
def collections():
    g = ProvDocument()
    ex = Namespace('ex', 'http://example.org/')

    c1 = g.collection(ex['c1'])
    e1 = g.entity('ex:e1')
    g.hadMember(c1, e1)

    return g
def save_provenance(prov_doc: ProvDocument, filepath: Path):
    logging.debug("Saving provenance files:")
    logging.debug(" - %s", filepath)
    with filepath.open("w") as f:
        prov_doc.serialize(f)
    provn_content = prov_doc.get_provn()
    filepath = filepath.with_suffix(".provn")
    logging.debug(" - %s", filepath)
    with filepath.open("w") as f:
        f.write(provn_content)
Esempio n. 30
0
def long_literals():
    g = ProvDocument()

    long_uri = "http://Lorem.ipsum/dolor/sit/amet/consectetur/adipiscing/elit/Quisque/vel/sollicitudin/felis/nec/venenatis/massa/Aenean/lectus/arcu/sagittis/sit/amet/nisl/nec/varius/eleifend/sem/In/hac/habitasse/platea/dictumst/Aliquam/eget/fermentum/enim/Curabitur/auctor/elit/non/ipsum/interdum/at/orci/aliquam/"
    ex = Namespace('ex', long_uri)
    g.add_namespace(ex)

    g.entity('ex:e1', {'prov:label': 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec pellentesque luctus nulla vel ullamcorper. Donec sit amet ligula sit amet lorem pretium rhoncus vel vel lorem. Sed at consequat metus, eget eleifend massa. Fusce a facilisis turpis. Lorem volutpat.'})

    return g
Esempio n. 31
0
def get_esmvaltool_provenance():
    """Create an esmvaltool run activity."""
    provenance = ProvDocument()
    namespace = 'software'
    create_namespace(provenance, namespace)
    attributes = {}  # TODO: add dependencies with versions here
    activity = provenance.activity(namespace + ':esmvaltool==' + __version__,
                                   other_attributes=attributes)

    return activity
Esempio n. 32
0
    def start(self, workflow=False):
        from daops import __version__ as daops_version
        from flamingo import __version__ as flamingo_version

        self.doc = ProvDocument()
        # Declaring namespaces for various prefixes
        self.doc.set_default_namespace(uri="http://purl.org/roocs/prov#")
        self.doc.add_namespace("prov", uri="http://www.w3.org/ns/prov#")
        self.doc.add_namespace(
            "provone",
            uri="http://purl.dataone.org/provone/2015/01/15/ontology#")
        self.doc.add_namespace("dcterms", uri="http://purl.org/dc/terms/")
        # Define entities
        project_cds = self.doc.agent(
            ":copernicus_CDS",
            {
                "prov:type": "prov:Organization",
                "dcterms:title": "Copernicus Climate Data Store",
            },
        )
        self.sw_flamingo = self.doc.agent(
            ":flamingo",
            {
                "prov:type":
                "prov:SoftwareAgent",
                "dcterms:source":
                f"https://github.com/cedadev/flamingo/releases/tag/v{flamingo_version}",
            },
        )
        self.doc.wasAttributedTo(self.sw_flamingo, project_cds)
        self.sw_daops = self.doc.agent(
            ":daops",
            {
                "prov:type":
                "prov:SoftwareAgent",
                "dcterms:source":
                f"https://github.com/roocs/daops/releases/tag/v{daops_version}",
            },
        )
        # workflow
        if workflow is True:
            self.workflow = self.doc.entity(":workflow",
                                            {"prov:type": "provone:Workflow"})
            orchestrate = self.doc.activity(
                ":orchestrate",
                other_attributes={
                    "prov:startedAtTime": "2020-11-26T09:15:00",
                    "prov:endedAtTime": "2020-11-26T09:30:00",
                },
            )
            self.doc.wasAssociatedWith(orchestrate,
                                       agent=self.sw_flamingo,
                                       plan=self.workflow)
Esempio n. 33
0
    def __init__(self, version, out_dir, zipped=True):
        out_dirname = os.path.basename(out_dir)
        out_path = os.path.dirname(out_dir)

        # Create output path from output name
        self.zipped = zipped
        if not self.zipped:
            out_dirname = out_dirname + ".nidm"
        else:
            out_dirname = out_dirname + ".nidm.zip"
        out_dir = os.path.join(out_path, out_dirname)

        # Quit if output path already exists and user doesn't want to overwrite
        # it
        if os.path.exists(out_dir):
            msg = out_dir + " already exists, overwrite?"
            if not input("%s (y/N) " % msg).lower() == 'y':
                quit("Bye.")
            if os.path.isdir(out_dir):
                shutil.rmtree(out_dir)
            else:
                os.remove(out_dir)
        self.out_dir = out_dir

        if version == "dev":
            self.version = {
                'major': 10000,
                'minor': 0,
                'revision': 0,
                'num': version
            }
        else:
            major, minor, revision = version.split(".")
            if "-rc" in revision:
                revision, rc = revision.split("-rc")
            else:
                rc = -1
            self.version = {
                'major': int(major),
                'minor': int(minor),
                'revision': int(revision),
                'rc': int(rc),
                'num': version
            }

        # Initialise prov document
        self.doc = ProvDocument()
        self._add_namespaces()

        # A temp directory that will contain the exported data
        self.export_dir = tempfile.mkdtemp(prefix="nidm-", dir=out_path)

        self.prepend_path = ''
Esempio n. 34
0
    def test_get_element_invalid(self):
        """
        Test get element with error

        """

        with self.assertRaises(InvalidArgumentTypeException):
            self.provapi.get_element(None)

        with self.assertRaises(NotFoundException):
            doc = ProvDocument()
            name = doc.valid_qualified_name("prov:Some unused name")
            self.provapi.get_element(name)
Esempio n. 35
0
def prov_db_unknown_prov_typ_example():
    doc = ProvDocument()
    doc.add_namespace("ex", "https://example.com")
    doc.entity(identifier="ex:Entity1")
    doc.entity(identifier="ex:Entity2")
    doc.influence(influencee="ex:Entity1", influencer="ex:Entity2")
    return doc
Esempio n. 36
0
    def test_loading_all_json(self):
        # self.assertFalse(fails, 'Failed to load/round-trip %d JSON files (%s)' % (len(fails), ', '.join(fails)))

        # Code for debugging the failed tests
        for filename in self.fails:
            # Reload the failed files
            filepath = self.json_path + filename
#             os.rename(json_path + filename, json_path + filename + '-fail')
            with open(filepath) as json_file:
                logger.info("Loading %s...", filepath)
                g1 = ProvDocument.deserialize(json_file)
                json_str = g1.serialize(indent=4)
                g2 = ProvDocument.deserialize(content=json_str)
                self.assertEqual(g1, g2, 'Round-trip JSON encoding/decoding failed:  %s.' % filename)
Esempio n. 37
0
def base_connector_relation_parameter_example():
    doc = ProvDocument()
    doc.add_namespace("ex", "http://example.com")
    doc.add_namespace("custom", "http://custom.com")

    namespaces = dict()
    namespaces.update({"ex": "http://example.com"})
    namespaces.update({"custom": "http://custom.com"})

    type_map = dict()
    type_map.update({"int value": "int"})
    type_map.update({"date value": "xds:datetime"})

    metadata = dict()

    metadata.update({METADATA_KEY_PROV_TYPE: PROV_RECORD_IDS_MAP["mentionOf"]})
    metadata.update({METADATA_KEY_IDENTIFIER: "identifier for the relation"})
    metadata.update({METADATA_KEY_TYPE_MAP: type_map})
    metadata.update({METADATA_KEY_NAMESPACES: namespaces})

    return_data = dict()
    return_data.update({"attributes": attributes_dict_example()})
    return_data.update({"metadata": metadata})
    return_data.update({"from_node": doc.valid_qualified_name("ex:Yoda")})
    return_data.update(
        {"to_node": doc.valid_qualified_name("ex:Luke Skywalker")})
    return_data.update({"doc": doc})

    return return_data
class ProjectProvenance:
    def __init__(self, database_helper, full_provenance=False):
        """
        Initializes the provenance for the mjclawar_rarshad project

        Parameters
        ----------
        database_helper: DatabaseHelper
        full_provenance: bool

        Returns
        -------
        """
        assert isinstance(database_helper, DatabaseHelper)

        self.database_helper = database_helper
        if full_provenance:
            self.prov_doc = ProvDocument.deserialize(dir_info.plan_json)
        else:
            self.prov_doc = ProvDocument()
        self.prov_doc.add_namespace(mcras.BDP_NAMESPACE.name, mcras.BDP_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.ALG_NAMESPACE.name, mcras.ALG_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.DAT_NAMESPACE.name, mcras.DAT_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.LOG_NAMESPACE.name, mcras.LOG_NAMESPACE.link)
        self.prov_doc.add_namespace(mcras.ONT_NAMESPACE.name, mcras.ONT_NAMESPACE.link)

    def write_provenance_json(self):
        self.prov_doc.serialize(dir_info.plan_json)
Esempio n. 39
0
def test_references(tmp_path, monkeypatch):
    """Test1: references are replaced with bibtex."""
    # Create fake provenance
    provenance = ProvDocument()
    provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file')
    provenance.add_namespace('attribute',
                             uri=ESMVALTOOL_URI_PREFIX + 'attribute')
    filename = str(tmp_path / 'output.nc')
    attributes = {
        'attribute:references': 'test_tag',
        'attribute:script_file': 'diagnostics.py'
    }
    provenance.entity('file:' + filename, attributes)

    # Create fake bibtex references tag file
    references_path = tmp_path / 'references'
    references_path.mkdir()
    monkeypatch.setattr(esmvalcore._citation.DIAGNOSTICS, 'path', tmp_path)
    fake_bibtex_file = references_path / 'test_tag.bibtex'
    fake_bibtex = "Fake bibtex file content\n"
    fake_bibtex_file.write_text(fake_bibtex)

    _write_citation_files(filename, provenance)
    citation_file = tmp_path / 'output_citation.bibtex'
    citation = citation_file.read_text()
    assert citation == '\n'.join([ESMVALTOOL_PAPER, fake_bibtex])
Esempio n. 40
0
    def test_xsd_qnames(self):
        prov_doc = ProvDocument()
        ex = Namespace('ex', 'http://www.example.org/')
        prov_doc.add_namespace(ex)
        ex1 = Namespace('ex1', 'http://www.example1.org/')  # ex1 is not added to the document

        an_xsd_qname = XSDQName(ex['a_value'])
        another_xsd_qname = XSDQName(ex1['another_value'])

        e1 = prov_doc.entity('ex:e1', {'prov:value': an_xsd_qname, 'prov:type': another_xsd_qname})
        for _, attr_value in e1.attributes:
            self.assertIsInstance(attr_value, XSDQName)

        self.assertRoundTripEquivalence(prov_doc)
Esempio n. 41
0
 def setUp(self):
     self.json_path = os.path.dirname(os.path.abspath(__file__)) + '/json/'
     filenames = os.listdir(self.json_path)
     self.fails = []
     for filename in filenames:
         if filename.endswith('.json'):
             with open(self.json_path + filename) as json_file:
                 try:
                     g1 = ProvDocument.deserialize(json_file)
                     json_str = g1.serialize(indent=4)
                     g2 = ProvDocument.deserialize(content=json_str)
                     self.assertEqual(g1, g2, 'Round-trip JSON encoding/decoding failed:  %s.' % filename)
                 except:
                     self.fails.append(filename)
Esempio n. 42
0
def test_cmip6_data_citation_url(tmp_path):
    """Test3: CMIP6 info_url is retrieved from ES-DOC."""
    # Create fake provenance
    provenance = ProvDocument()
    provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file')
    provenance.add_namespace('attribute',
                             uri=ESMVALTOOL_URI_PREFIX + 'attribute')
    attributes = {
        'attribute:mip_era': 'CMIP6',
        'attribute:activity_id': 'activity',
        'attribute:institution_id': 'institution',
        'attribute:source_id': 'source',
        'attribute:experiment_id': 'experiment',
    }
    filename = str(tmp_path / 'output.nc')
    provenance.entity('file:' + filename, attributes)
    _write_citation_files(filename, provenance)
    citation_url = tmp_path / 'output_data_citation_info.txt'

    # Create fake info url
    fake_url_prefix = '.'.join(attributes.values())
    text = '\n'.join([
        "Follow the links below to find more information about CMIP6 data:",
        f"- {CMIP6_URL_STEM}/cmip6?input={fake_url_prefix}",
        '',
    ])
    assert citation_url.read_text() == text
Esempio n. 43
0
    def test_bundle_update_simple(self):
        doc = ProvDocument()
        doc.set_default_namespace(EX_URI)

        b1 = doc.bundle('b1')
        b1.entity('e')

        b2 = doc.bundle('b2')
        b2.entity('e')

        self.assertRaises(ProvException, lambda: b1.update(1))
        self.assertRaises(ProvException, lambda: b1.update(doc))

        b1.update(b2)
        self.assertEqual(len(b1.get_records()), 2)
Esempio n. 44
0
def main(auth_json_path, full_provenance=False):
    with open(auth_json_path, 'r') as f:
        auth_json = json.load(f)
        api_token = auth_json['services']['cityofbostondataportal']['token']
        username = '******'#auth_json['services']['cityofbostondataportal']['username']
        mongo_pass = '******' #auth_json['services']['cityofbostondataportal']['username']

    database_helper = database_helpers.DatabaseHelper(username=username, password=mongo_pass)
    bdp_api = bdp_query.BDPQuery(api_token=api_token)

    if full_provenance:
        with open(plan_json, 'w') as f:
            f.write(json.dumps({}))

    setup_crime_incidents(database_helper, bdp_api, full_provenance=full_provenance)
    setup_property_assessment(database_helper, bdp_api, full_provenance=full_provenance)
    setup_boston_public_schools(database_helper, bdp_api, full_provenance=full_provenance)
    setup_hospital_locations(database_helper, bdp_api, full_provenance=full_provenance)
    setup_crime_centroids(database_helper, full_provenance=full_provenance)
    setup_hospital_distances(database_helper, full_provenance=full_provenance)
    setup_crime_knn(database_helper, full_provenance=full_provenance)
    setup_home_value_model(database_helper, full_provenance=full_provenance)
    setup_hospital_scatter(database_helper, full_provenance=full_provenance)
    setup_school_distances(database_helper, full_provenance=full_provenance)
    setup_school_scatter(database_helper, full_provenance=full_provenance)

    if full_provenance:
        with open(plan_json, 'r') as f:
            prov_doc = ProvDocument.deserialize(f)
            dot = prov_to_dot(prov_doc)
            dot.write_svg(prov_svg)
Esempio n. 45
0
def datatypes():
    g = ProvDocument()
    ex = Namespace('ex', 'http://example.org/')
    g.add_namespace(ex)

    attributes = {'ex:int': 100,
                  'ex:float': 100.123456,
                  'ex:long': 123456789000,
                  'ex:bool': True,
                  'ex:str': 'Some string',
                  'ex:unicode': u'Some unicode string with accents: Huỳnh Trung Đông',
                  'ex:timedate': datetime.datetime(2012, 12, 12, 14, 7, 48),
                  'ex:intstr': Literal("PROV Internationalized string", PROV["InternationalizedString"], "en"),
    }
    multiline = """Line1
    Line2
Line3"""
    attributes['ex:multi-line'] = multiline
    g.entity('ex:e1', attributes)
    return g
Esempio n. 46
0
 def prov(self, format='json', filename=None):
     if self.prov_url is None:
         raise APIException('no provenance information found')
     response = self.adama.utils.request(self.prov_url, format=format)
     if format in ('json', 'sources'):
         return response.json()
     elif format == 'prov-n':
         return response.text
     elif format == 'prov':
         return ProvDocument.deserialize(
             content=json.dumps(response.json()))
     elif format == 'png':
         return png(response.content, filename)
Esempio n. 47
0
    def get_bundle(self, document_id, bundle_id, prov_format=ProvDocument):
        if prov_format == ProvDocument:
            extension = 'json'
        else:
            extension = prov_format

        r = self._request('get', "/documents/%i/bundles/%i.%s" % (document_id, bundle_id, extension),
                          headers=self.headers)

        if prov_format == ProvDocument:
            return ProvDocument.deserialize(content=r.content)
        else:
            return r.content
Esempio n. 48
0
    def get_document(self, doc_id, format=None, flattened=False, view=None):
        """Returns a ProvBundle object of the document with the ID provided or raises ApiNotFoundError"""

        extension = format if format is not None else 'json'
        view = "/views/%s" % view if view in ['data', 'process', 'responsibility'] else ""
        url = "documents/%d%s%s.%s" % (doc_id, "/flattened" if flattened else "", view, extension)
        response = self.request(url, raw=True)

        if format is None:
            # Try to decode it as a ProvDocument
            result = ProvDocument.deserialize(content=response)
        else:
            # return the raw response
            result = response
        return result
Esempio n. 49
0
 def testAllExamples(self):
     num_graphs = len(examples.tests)
     logger.info('PROV-JSON round-trip testing %d example provenance graphs', num_graphs)
     counter = 0
     for name, graph in examples.tests:
         counter += 1
         logger.info('%d. Testing the %s example', counter, name)
         g1 = graph()
         logger.debug('Original graph in PROV-N\n%s', g1.get_provn())
         # json_str = g1.get_provjson(indent=4)
         json_str = g1.serialize(indent=4)
         logger.debug('Original graph in PROV-JSON\n%s', json_str)
         g2 = ProvDocument.deserialize(content=json_str)
         logger.debug('Graph decoded from PROV-JSON\n%s', g2.get_provn())
         self.assertEqual(g1, g2, 'Round-trip JSON encoding/decoding failed:  %s.' % name)
Esempio n. 50
0
 def test_unifying(self):
     # This is a very trivial test just to exercise the unified() function
     # TODO: Create a proper unification test
     json_path = os.path.dirname(os.path.abspath(__file__)) + '/unification/'
     filenames = os.listdir(json_path)
     for filename in filenames:
         if not filename.endswith('.json'):
             continue
         filepath = json_path + filename
         with open(filepath) as json_file:
             logger.info('Testing unifying: %s', filename)
             logger.debug("Loading %s...", filepath)
             document = ProvDocument.deserialize(json_file)
             flattened = document.flattened()
             unified = flattened.unified()
             self.assertLess(len(unified.get_records()), len(flattened.get_records()))
Esempio n. 51
0
    def test_decoding_unicode_value(self):
        unicode_char = u'\u2019'
        json_content = u'''{
    "prefix": {
        "ex": "http://www.example.org"
    },
    "entity": {
        "ex:unicode_char": {
            "prov:label": "%s"
        }
    }
}''' % unicode_char

        prov_doc = ProvDocument.deserialize(content=json_content, format='json')
        e1 = prov_doc.get_record('ex:unicode_char')[0]
        self.assertIn(unicode_char, e1.get_attribute('prov:label'))
Esempio n. 52
0
    def test_decoding_unicode_value(self):
        unicode_char = u'\u2019'
        rdf_content = u'''
@prefix ex: <http://www.example.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

    ex:unicode_char a prov:Entity ;
        rdfs:label "%s"^^xsd:string .
''' % unicode_char
        prov_doc = ProvDocument.deserialize(content=rdf_content,
                                            format='rdf', rdf_format='turtle')
        e1 = prov_doc.get_record('ex:unicode_char')[0]
        self.assertIn(unicode_char, e1.get_attribute('prov:label'))
Esempio n. 53
0
    def assertRoundTripEquivalence(self, prov_doc, msg=None):
        if self.FORMAT is None:
            # This is a dummy test, just return
            return

        with io.BytesIO() as stream:
            prov_doc.serialize(destination=stream, format=self.FORMAT, indent=4)
            stream.seek(0, 0)

            prov_doc_new = ProvDocument.deserialize(source=stream, format=self.FORMAT)
            stream.seek(0, 0)
            # Assume UTF-8 encoding which is forced by the particular
            # PROV XML implementation and should also work for the PROV
            # JSON implementation.
            msg_extra = "'%s' serialization content:\n%s" % (self.FORMAT, stream.read().decode("utf-8"))
            msg = "\n".join((msg, msg_extra)) if msg else msg_extra
            self.assertEqual(prov_doc, prov_doc_new, msg)
Esempio n. 54
0
    def __init__(self, version, out_dir, zipped=True):
        out_dirname = os.path.basename(out_dir)
        out_path = os.path.dirname(out_dir)

        # Create output path from output name
        self.zipped = zipped
        if not self.zipped:
            out_dirname = out_dirname+".nidm"
        else:
            out_dirname = out_dirname+".nidm.zip"
        out_dir = os.path.join(out_path, out_dirname)

        # Quit if output path already exists and user doesn't want to overwrite
        # it
        if os.path.exists(out_dir):
            msg = out_dir+" already exists, overwrite?"
            if not input("%s (y/N) " % msg).lower() == 'y':
                quit("Bye.")
            if os.path.isdir(out_dir):
                shutil.rmtree(out_dir)
            else:
                os.remove(out_dir)
        self.out_dir = out_dir

        if version == "dev":
            self.version = {'major': 10000, 'minor': 0, 'revision': 0,
                            'num': version}
        else:
            major, minor, revision = version.split(".")
            if "-rc" in revision:
                revision, rc = revision.split("-rc")
            else:
                rc = -1
            self.version = {'major': int(major), 'minor': int(minor),
                            'revision': int(revision), 'rc': int(rc),
                            'num': version}

        # Initialise prov document
        self.doc = ProvDocument()
        self._add_namespaces()

        # A temp directory that will contain the exported data
        self.export_dir = tempfile.mkdtemp(prefix="nidm-", dir=out_path)

        self.prepend_path = ''
Esempio n. 55
0
    def generateProvlet(self, aDO, aRO):
        # create provlet
        d1 = ProvDocument()  # d1 is now an empty provenance document
        d1.add_namespace("dt", "http://cs.ncl.ac.uk/dtsim/")

        e1 = d1.entity(DTns + aRO.id)
        ag1 = d1.agent(DTns + str(aDO.id))
        d1.wasAttributedTo(e1, ag1)

        # update global graph
        e1 = pGlobal.entity(DTns + aRO.id)
        ag1 = pGlobal.agent(DTns + str(aDO.id))
        pGlobal.wasAttributedTo(e1, ag1)

        # 		self.notify(d1)
        return d1
    def setUp(self):
        self.export_dir = os.path.join(TEST_FOLDER, 'nidm')
        if not os.path.isdir(self.export_dir):
            os.mkdir(self.export_dir)

        # Retreive owl file for NIDM-Results
        owl_file = os.path.join(TERM_RESULTS_DIR, 'nidm-results.owl')
        assert owl_file
        self.owl = OwlReader(owl_file)

        self.doc = ProvDocument()
        # self.bundle = ProvBundle(identifier=NIIRI[software_lc+'_results_id'])

        self.provn_file = os.path.join(self.export_dir, 'unit_test.provn')

        namespaces_file = os.path.join(TERM_RESULTS_DIR, "templates", \
            "Namespaces.txt")
        namespaces_fid = open(namespaces_file)
        self.prefixes = namespaces_fid.read()
        namespaces_fid.close()

        self.to_delete_files = [self.provn_file]
        self.gt_ttl_files = list()
def to_prov(obj, namespace, service):
    """
    :type obj: dict
    :rtype: prov.model.ProvDocument
    """
    g = ProvDocument()
    ap = Namespace('aip', 'https://araport.org/provenance/')

    g.add_namespace("dcterms", "http://purl.org/dc/terms/")
    g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")

    vaughn = g.agent(ap['matthew_vaughn'], {
        'prov:type': PROV["Person"], 'foaf:givenName': "Matthew Vaughn",
        'foaf:mbox': "<mailto:[email protected]>"
    })
    # Hard coded for now
    walter = g.agent(ap['walter_moreira'], {
        'prov:type': PROV["Person"], 'foaf:givenName': "Walter Moreira",
        'foaf:mbox': "<mailto:[email protected]>"
    })
    utexas = g.agent(ap['university_of_texas'], {
        'prov:type': PROV["Organization"],
        'foaf:givenName': "University of Texas at Austin"
    })
    g.actedOnBehalfOf(walter, utexas)
    g.actedOnBehalfOf(vaughn, utexas)
    adama_platform = g.agent(
        ap['adama_platform'],
        {'dcterms:title': "ADAMA",
         'dcterms:description': "Araport Data And Microservices API",
         'dcterms:language': "en-US",
         'dcterms:identifier': "https://api.araport.org/community/v0.3/",
         'dcterms:updated': "2015-04-17T09:44:56"})
    g.wasGeneratedBy(adama_platform, walter)
    g.wasGeneratedBy(adama_platform, vaughn)

    iden = service_iden(namespace, service)
    srv = service_store[iden]['service']
    adama_microservice = g.agent(
        ap[iden],
        {'dcterms:title': srv.name.title(),
         'dcterms:description': srv.description,
         'dcterms:language': "en-US",
         'dcterms:identifier': api_url_for('service',
                                           namespace=namespace,
                                           service=service),
         'dcterms:source': srv.git_repository
         })

    g.used(adama_microservice, adama_platform, datetime.datetime.now())

    for author in getattr(srv, 'authors', []):
        try:
            author_name = author['name']
            author_email = author['email']
        except KeyError:
            raise APIException(
                'name and email are required in author field')
        author_agent = g.agent(
            ap[slugify(author_name)],
            {'prov:type': PROV['Person'],
             'foaf:givenName': author_name,
             'foaf:mbox': '<mailto:{}>'.format(author_email)})
        sponsor_name = author.get('sponsor_organization_name', None)
        if sponsor_name:
            sponsor_agent = g.agent(
                ap[slugify(sponsor_name)],
                {'prov:type': PROV['Organization'],
                 'foaf:givenName': sponsor_name,
                 'dcterms:identifier': author.get('sponsor_uri', '')})
            g.actedOnBehalfOf(author_agent, sponsor_agent)
        g.wasGeneratedBy(adama_microservice,
                         author_agent,
                         datetime.datetime.now())

    sources_entities = process_sources(srv.sources, g, ap)
    for src in sources_entities:
        g.used(adama_microservice, src, datetime.datetime.now())

    response = g.entity(ap['adama_response'])
    g.wasGeneratedBy(response, ap[srv.type], datetime.datetime.now())
    g.used(ap[srv.type], adama_microservice, datetime.datetime.now())

    return g
Esempio n. 58
0
def job2prov(job):
    """
    Create ProvDocument based on job description
    :param job: UWS job
    :return: ProvDocument
    """

    # job.jdl.content = {
    #     'description': description,
    #     'parameters': parameters,
    #     'results': results,
    #     'executionduration': execdur,
    #     'quote': quote
    # }
    # parameters[pname] = {
    #     'type': p.get('type'),
    #     'required': p.get('required'),
    #     'default': p.get('default'),
    #     'description': list(p)[0].text,
    # }
    # results[r.get('value')] = {
    #     'mediaType': r.get('mediaType'),
    #     'default': r.get('default'),
    #     'description': list(r)[0].text,
    # }

    pdoc = ProvDocument()
    # Declaring namespaces for various prefixes used in the example
    pdoc.add_namespace('prov', 'http://www.w3.org/ns/prov#')
    pdoc.add_namespace('voprov', 'http://www.ivoa.net/ns/voprov#')
    pdoc.add_namespace('cta', 'http://www.cta-observatory.org#')
    pdoc.add_namespace('uwsdata', 'https://voparis-uws-test.obspm.fr/rest/' + job.jobname + '/' + job.jobid + '/')
    pdoc.add_namespace('ctajobs', 'http://www.cta-observatory.org#')
    # Adding an activity
    ctbin = pdoc.activity('ctajobs:' + job.jobname, job.start_time, job.end_time)
    # TODO: add job description, version, url, ...
    # Agent
    pdoc.agent('cta:consortium', other_attributes={'prov:type': "Organization"})
    pdoc.wasAssociatedWith(ctbin, 'cta:consortium')
    # Entities, in and out with relations
    e_in = []
    for pname, pdict in job.jdl.content['parameters'].iteritems():
        #if pname.startswith('in'):
        if any(x in pdict['type'] for x in ['file', 'xs:anyURI']):
            e_in.append(pdoc.entity('uwsdata:parameters/' + pname))
            # TODO: use publisher_did? add prov attributes, add voprov attributes?
            ctbin.used(e_in[-1])
    e_out = []
    for rname, rdict in job.jdl.content['results'].iteritems():
        e_out.append(pdoc.entity('uwsdata:results/' + rname))
        # TODO: use publisher_did? add prov attributes, add voprov attributes?
        e_out[-1].wasGeneratedBy(ctbin)
        for e in e_in:
            e_out[-1].wasDerivedFrom(e)
    return pdoc