def save_provenance(prov_doc: ProvDocument, filepath: Path):
    logging.debug("Saving provenance files:")
    logging.debug(" - %s", filepath)
    with filepath.open("w") as f:
        prov_doc.serialize(f)
    provn_content = prov_doc.get_provn()
    filepath = filepath.with_suffix(".provn")
    logging.debug(" - %s", filepath)
    with filepath.open("w") as f:
        f.write(provn_content)
Beispiel #2
0
class BioProvDocument:
    """
    Class containing base provenance information for a Project.
    """
    def __init__(
        self,
        project,
        add_attributes=False,
        add_users=True,
        _add_project_namespaces=True,
        _iter_samples=True,
        _iter_project=True,
    ):
        """
        Constructs the W3C-PROV document for a project.

        :param Project project: instance of bioprov.src.Project.
        :param bool add_attributes: whether to add object attributes.
        :param bool add_users: whether to add users and environments.
        :param bool _add_project_namespaces:
        :param bool _iter_samples:
        :param bool _iter_project:
        """

        # Assert Project is good before constructing instance
        assert isinstance(project,
                          Project), Warnings()["incorrect_type"](project,
                                                                 Project)
        self.ProvDocument = ProvDocument()
        self.project = project
        self.project.document = self.ProvDocument
        self._dot = prov_to_dot(self.ProvDocument)
        self._provn = self.ProvDocument.get_provn()
        self._entities = dict()
        self._activities = dict()
        self._agents = dict()
        self._user_bundles = dict()
        self._provstore_document = None

        # Don't add attributes if you plan on exporting to graphic format
        self.add_attributes = add_attributes

        # Set this before running Namespaces
        if add_users:
            self._create_envs_and_users = True

        else:
            self._create_envs_and_users = False

        # Default actions to create the document
        if _add_project_namespaces:
            self._add_project_namespaces()

        if self._create_envs_and_users:
            self._iter_envs_and_users()

        if _iter_project:
            self._iter_project()

        if _iter_samples:
            self._iter_samples()

    def __repr__(self):
        return "BioProvDocument describing Project '{}' with {} samples.".format(
            self.project.tag, len(self.project))

    @property
    def dot(self):
        self._dot = prov_to_dot(self.ProvDocument)
        return self._dot

    @dot.setter
    def dot(self, value):
        self._dot = value

    @property
    def provn(self):
        self._provn = self.ProvDocument.get_provn()
        return self._provn

    @provn.setter
    def provn(self, value):
        self._provn = value

    @property
    def provstore_document(self):
        self._provstore_document = self.ProvDocument
        return self._provstore_document

    @provstore_document.setter
    def provstore_document(self, value):
        self._provstore_document = value

    def _add_project_namespaces(self):
        """
        Runs the three _add_namespace functions.
        :return:
        """
        self._add_project_namespace()
        if self._create_envs_and_users:
            self._add_env_and_user_namespace()
        self._add_samples_namespace()
        self._add_activities_namespace()

    def _add_project_namespace(self):
        """
        Creates the Project Namespace and Project Entity.
        # Sets the default Namespace of the BioProvDocument as the Project.

        :return: updates self.project and self.ProvDocument.
        """
        self.ProvDocument.add_namespace("project", str(self.project))

    def _add_env_and_user_namespace(self):
        self.ProvDocument.add_namespace(
            "users",
            f"Users associated with BioProv Project '{self.project.tag}'")

    def _add_samples_namespace(self):
        self.ProvDocument.add_namespace(
            "samples",
            f"Samples associated with bioprov Project '{self.project.tag}'",
        )

    def _add_files_namespace(self):
        self.ProvDocument.add_namespace(
            "files",
            f"Files associated with bioprov Project '{self.project.tag}'")

    def _iter_project(self):
        self._create_sample_bundle(self.project, kind="Project")
        self._create_sample_file_entities(self.project, kind="Project")
        self._create_program_entities(self.project, kind="Project")

    def _iter_envs_and_users(self):
        for _user, _env_dict in self.project.users.items():
            _user_preffix = f"users:{_user}"
            _user_bundle = self._user_bundles[
                _user] = self.ProvDocument.bundle(_user_preffix)
            _user_bundle.set_default_namespace(_user)
            _user_bundle.add_namespace(
                "envs", f"Environments associated with User '{_user}'")
            self._agents[_user] = _user_bundle.agent(_user_preffix)

    def _iter_samples(self):
        for _, sample in self.project.samples.items():
            for statement in (
                    self._create_sample_bundle(sample),
                    self._create_sample_file_entities(sample),
                    self._create_program_entities(sample),
            ):
                try:
                    statement
                except KeyError:
                    config.logger.debug(
                        f"Could not run function '{statement.__name__}' for sample {sample.name}."
                    )
                    pass

    def _create_sample_bundle(self, object_, kind="Sample"):
        """
        Creates a ProvBundle for the Sample and associates it to self.ProvDocument.

        :param object_: instance of bioprov.Sample
        :return: updates self.ProvDocument by creating PROV objects for the sample.
        """
        choices = ("Sample", "Project")
        assert kind in choices, Warnings()["choices"](kind, choices, "kind")
        # Sample PROV attributes: bundle, namespace, entity
        object_.ProvBundle = self.ProvDocument.bundle(
            object_.namespace_preffix)
        object_.ProvBundle.set_default_namespace(object_.name)
        self._entities[
            object_.name] = object_.entity = object_.ProvBundle.entity(
                object_.namespace_preffix)
        if kind == "Sample":
            object_.ProvBundle.wasDerivedFrom(self._entities[object_.name],
                                              self.project.entity)

    def _create_sample_file_entities(self, sample, kind="Sample"):
        """
        Creates a ProvBundle for the Sample and associates it to self.ProvDocument.

        :param sample: instance of bioprov.Sample
        :return: updates the sample.ProvBundle by creating PROV objects for the files.

        """
        sample.files_namespace_preffix = "files"
        sample.file_namespace = sample.ProvBundle.add_namespace(
            sample.files_namespace_preffix,
            f"Files associated with {kind} {sample.name}",
        )
        # Files PROV attributes: namespace, entities
        for key, file in sample.files.items():
            # This prevents errors when the file refers to a project csv or JSON
            if file.name == sample.name:
                file.name = file.basename
            # Same function call, but in the first we pass the 'other_attributes' argument
            if self.add_attributes:
                self._entities[file.name] = sample.ProvBundle.entity(
                    f"{sample.files_namespace_preffix}:{file.tag}",
                    other_attributes=build_prov_attributes(
                        file.serializer(), sample.file_namespace),
                )
            else:
                self._entities[file.name] = sample.ProvBundle.entity(
                    f"{sample.files_namespace_preffix}:{file.tag}", )

            # Adding relationships
            sample.ProvBundle.wasDerivedFrom(
                self._entities[file.name],
                self._entities[sample.name],
            )

    def _create_program_entities(self, sample, kind="Sample"):
        # Programs PROV attributes: namespace, entities
        programs_namespace_prefix = f"programs"
        programs_namespace = sample.ProvBundle.add_namespace(
            programs_namespace_prefix,
            f"Programs associated with {kind} {sample.name}",
        )
        for key, program in sample.programs.items():
            last_run = program.runs[str(len(program.runs))]

            # We want to exclude _runs from the program serializer
            # So we put a custom serializer filter
            keys = ("sample", "_runs")
            serialized_program = serializer_filter(program, keys)
            try:
                del serialized_program["params"]
            except KeyError:
                pass

            # Same function call, but in the first we pass the 'other_attributes' argument
            if self.add_attributes:
                self._activities[program.name] = sample.ProvBundle.activity(
                    f"{programs_namespace_prefix}:{program.name}",
                    startTime=last_run.start_time,
                    endTime=last_run.end_time,
                    other_attributes=build_prov_attributes(
                        serialized_program, programs_namespace),
                )
            else:
                self._activities[program.name] = sample.ProvBundle.activity(
                    f"{programs_namespace_prefix}:{program.name}",
                    startTime=last_run.start_time,
                    endTime=last_run.end_time,
                )

            if self._create_envs_and_users:
                for _user, _env_dict in self.project.users.items():
                    _user_bundle = self._user_bundles[_user]
                    for _env_hash, _env in _env_dict.items():
                        if _env_hash == last_run.env:
                            if self.add_attributes:
                                self._agents[_env_hash] = _user_bundle.agent(
                                    f"envs:{_env}",
                                    other_attributes=build_prov_attributes(
                                        _env.env_dict, _env.env_namespace),
                                )
                            else:
                                self._agents[_env_hash] = _user_bundle.agent(
                                    f"envs:{_env}")
                            if not _env.actedOnBehalfOf:
                                _user_bundle.actedOnBehalfOf(
                                    self._agents[_env_hash],
                                    self._agents[_user])
                                _env.actedOnBehalfOf = True
                sample.ProvBundle.wasAssociatedWith(
                    self._activities[program.name], self._agents[last_run.env])

            inputs, outputs = self._get_IO_from_params(program)
            self._add_IO_relationships(sample, program, inputs, "input")
            self._add_IO_relationships(sample, program, outputs, "output")

    def _add_IO_relationships(self, sample, program, io_list, io_type):
        # TODO: replace Sample for Project when implementing Project.files and programs
        """
        Add PROV relationships between Program and input/output files.

        :param sample: instance of bioprov.Sample
        :param program: instance of bioprov.Program
        :param io_list: list of input/output files
        :param io_type: 'input' or 'output'
        :return: Adds relationship between
        """

        # Small assertion block
        choices = ("input", "output")
        assert io_type in choices, Warnings()["choices"](io_type, choices,
                                                         "io_type")

        # Start function
        sample_files = [str(file) for _, file in sample.files.items()]
        for value in io_list:
            if value in sample_files:
                file_obj = [
                    file_ for _, file_ in sample.files.items()
                    if str(file_) == value
                ]
                if file_obj:
                    file_obj, *_ = file_obj
                    if io_type == "input":
                        sample.ProvBundle.used(
                            self._entities[file_obj.name],
                            self._activities[program.name],
                        )
                    elif io_type == "output":
                        sample.ProvBundle.wasGeneratedBy(
                            self._entities[file_obj.name],
                            self._activities[program.name],
                        )

    @staticmethod
    def _get_IO_from_params(program):
        """
        :param program: instance of bioprov.Program
        :return: list of input parameter values and list of output parameter values
        """
        # Relationships based on Parameters
        inputs, outputs = [], []

        for _, parameter in program.params.items():
            assert isinstance(parameter, Parameter), (
                Warnings()["incorrect_type"](parameter, Parameter) +
                "\nPlease check if Programs were correctly deserialized.")
            if parameter.kind == "input":
                # This loop is because some positional arguments may have empty values (value stored in parameter.key)
                if parameter.value:
                    inputs.append(parameter.value)
                else:
                    inputs.append(parameter.key)
            elif parameter.kind == "output":
                if parameter.value:
                    outputs.append(parameter.value)
                else:
                    outputs.append(parameter.key)

        return inputs, outputs

    def _add_activities_namespace(self):
        """
        Add activities Namespace to self.
        :return:
        """

        if len(self.ProvDocument.namespaces) == 0:
            self.ProvDocument.add_namespace(
                "activities",
                f"Activities associated with bioprov Project '{self.project.tag}'",
            )

    def upload_to_provstore(self, api=None):
        """
        Uploads self.ProvDocument. to ProvStore (https://openprovenance.org/store/)

        :param api: provstore.api.Api
        :return: Sends POST request to ProvStore API and updates self.ProvDocument if successful.
        """
        if api is None:
            api = config.provstore_api
        try:
            self.provstore_document = api.document.create(
                self.ProvDocument, name=self.project.tag)
        except ConnectionError:
            logging.error(
                "Could not create remote document. Please check your internet connection and ProvStore credentials."
            )

    def write_provn(self, path=None):
        """
        Writes PROVN output of document.
        :param path: Path to write file.
        :return: Writes file.
        """
        if path is None:
            path = f"./{self.project.tag}_provn"
            if self.add_attributes:
                path += "_attrs"
            path += ".txt"

        path = Path(path)
        assert (
            path.parent.exists()
        ), f"Directory '{path.parent}' not found.\nPlease provide a valid directory."

        if path.exists():
            logging.info(f"Overwriting file at '{path}'")

        with open(path, "w") as f:
            f.write(self.provn)
            if path.exists():
                logging.info(f"Wrote PROVN record to {path}.")
class NIDMObjectsUnitTesting(unittest.TestCase):
    """
    Unit testing of NIDM objects (compared to examples provided in 
    nidm-results.owl)
    """

    def setUp(self):
        self.export_dir = os.path.join(TEST_FOLDER, 'nidm')
        if not os.path.isdir(self.export_dir):
            os.mkdir(self.export_dir)

        # Retreive owl file for NIDM-Results
        owl_file = os.path.join(TERM_RESULTS_DIR, 'nidm-results.owl')
        assert owl_file
        self.owl = OwlReader(owl_file)

        self.doc = ProvDocument()
        # self.bundle = ProvBundle(identifier=NIIRI[software_lc+'_results_id'])

        self.provn_file = os.path.join(self.export_dir, 'unit_test.provn')

        namespaces_file = os.path.join(TERM_RESULTS_DIR, "templates", \
            "Namespaces.txt")
        namespaces_fid = open(namespaces_file)
        self.prefixes = namespaces_fid.read()
        namespaces_fid.close()

        self.to_delete_files = [self.provn_file]
        self.gt_ttl_files = list()

    def test_design_matrix(self):
        mat = np.matrix('1 2; 3 4')

        mat_image = os.path.join(os.path.dirname(TEST_FOLDER), "data", \
            "fmri.feat", "design.png")

        design_matrix = DesignMatrix(mat, mat_image, self.export_dir)
        self.doc.update(design_matrix.export())

        # In the FSL export the design matrix contains both the Design Matrix
        # entity and the Image entity representing the design matrix 
        # visualisation.
        self.to_delete_files.append(os.path.join(self.export_dir, \
            "DesignMatrix.csv"))
        self.to_delete_files.append(os.path.join(self.export_dir, \
            "DesignMatrix.png")) 

        gt_file = self.owl.get_example(NIDM['DesignMatrix'])
        self.gt_ttl_files = [os.path.join(TERM_RESULTS_DIR, \
            gt_file.replace("file://./", "")), 
            os.path.join(TERM_RESULTS_DIR, "examples", "Image-DesignMatrix.txt")]

        self._create_gt_and_compare("Design Matrix")

    def test_data(self):
        data = Data(grand_mean_scaling=True, target=100.0)
        self.doc.update(data.export())

        gt_file = self.owl.get_example(NIDM['Data'])
        self.gt_ttl_files.append(os.path.join(TERM_RESULTS_DIR, \
            gt_file.replace("file://./", "")))

        self._create_gt_and_compare("Data")

# INDEPEDENT_CORR = NIDM['IndependentError']
# SERIALLY_CORR = NIDM['SeriallyCorrelatedError']
# COMPOUND_SYMMETRY_CORR = NIDM['CompoundSymmetricError']
# ARBITRARILY_CORR = NIDM['ArbitriralyCorrelatedError']


    # def test_error_model_indepdt_global(self):
    #     error_distribution = GAUSSIAN_DISTRIBUTION
    #     variance_homo = True
    #     variance_spatial = SPATIALLY_GLOBAL
    #     dependance = INDEPEDENT_CORR
    #     dependance_spatial = SPATIALLY_GLOBAL

    #     error_model = ErrorModel(error_distribution, variance_homo, 
    #         variance_spatial, dependance, dependance_spatial)
    #     self.doc.update(error_model.export())

    #     nidm_classes = {
    #         "ErrorModel": dict(
    #             error_model_id="niiri:error_model_id",
    #             noise_distribution="nidm:GaussianDistribution",
    #             variance_homo="true",
    #             variance_spatial="nidm:SpatiallyGlobal",
    #             dependence="nidm:IndependentError",
    #             dependence_spatial="nidm:SpatiallyLocal"
    #         )
    #         }
    #     self._create_gt_and_compare(nidm_classes, "Data")

    def _create_gt_and_compare(self, class_name):
        # Write-out current example in a provn file and convert to turtle
        provn_fid = open(self.provn_file, 'w')
        provn_fid.write(self.doc.get_provn())
        provn_fid.close()

        ttl_file = self.provn_file.replace(".provn", ".ttl")

        call("provconvert -infile "+self.provn_file+" -outfile "+ttl_file, \
            shell=True)

        self.to_delete_files.append(ttl_file)

        # Load current example graph
        ex_graph = Graph()
        ex_graph.parse(source=ttl_file, format='turtle')
       
        # Read and concatenate ground truth files
        gt = ""
        for gt_ttl_file in self.gt_ttl_files:
            gt_fid = open(gt_ttl_file)
            # What is described in the examples to be at any path is relative 
            # in export
            gt = gt.replace("/path/to/", "./")
            gt = gt+gt_fid.read()
            gt_fid.close()

        gt_graph = Graph()
        gt = self.prefixes+gt
        gt_graph.parse(data=gt, format='turtle')

        # Compare graphs
        found_diff = compare_graphs(ex_graph, gt_graph)

        if found_diff:
            raise Exception("Difference in "+class_name+".")

    def tearDown(self):
        # Delete files created for testing
        for to_delete_file in self.to_delete_files:
            if os.path.isfile(to_delete_file):
                os.remove(to_delete_file)

        os.rmdir(self.export_dir)
def example():

    g = ProvDocument()
    # Local namespace
    # Doesnt exist yet so we are creating it
    ap = Namespace('aip', 'https://araport.org/provenance/')
    # Dublin Core
    g.add_namespace("dcterms", "http://purl.org/dc/terms/")
    # FOAF
    g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")

    # Add sponsors and contributors as Agents
    # ap['matthew_vaughn']
    # aip:matthew_vaughn
    # https://araport.org/provenance/:matthew_vaughn
    # Learn this from a call to profiles service? Adds a dependency on Agave so I am open to figuring out another way
    me = g.agent(
        ap['matthew_vaughn'], {
            'prov:type': PROV["Person"],
            'foaf:givenName': "Matthew Vaughn",
            'foaf:mbox': "<mailto:[email protected]>"
        })
    # Hard coded for now
    walter = g.agent(
        ap['walter_moreira'], {
            'prov:type': PROV["Person"],
            'foaf:givenName': "Walter Moreira",
            'foaf:mbox': "<mailto:[email protected]>"
        })
    utexas = g.agent(
        ap['university_of_texas'], {
            'prov:type': PROV["Organization"],
            'foaf:givenName': "University of Texas at Austin"
        })

    # Set delegation to our host University
    # We may have trouble doing this for other users since we don't always capture their host instituion
    g.actedOnBehalfOf(walter, utexas)
    g.actedOnBehalfOf(me, utexas)

    # Include the ADAMA platform as an Agent and set attribution
    # dcterms:title and dcterms:description are hardcoded
    # dcterms:language is hard-coded
    # dcterms:source is the URI of the public git source repository for ADAMA
    # "dcterms:updated": "2015-04-17T09:44:56" - this would actually be the date ADAMA was updated
    adama_platform = g.agent(
        ap['adama_platform'], {
            'dcterms:title': "ADAMA",
            'dcterms:description': "Araport Data and Microservices API",
            'dcterms:language': "en-US",
            'dcterms:identifier': "https://api.araport.org/community/v0.3/",
            'dcterms:updated': "2015-04-17T09:44:56"
        })
    g.wasGeneratedBy(adama_platform, walter)

    # Include the ADAMA microservice as an Agent and set attribution+delegation
    # dcterms:title and dcterms:description are inherited from the service's metadata
    # dcterms:language is hard-coded
    # dcterms:identifier is the deployment URI for the service
    # dcterms:source is the URI of the public git source repository. The URL in this example is just a dummy
    #
    # The name for each microservice should be unique. We've decided to
    # use the combination of namespace, service name, and version
    microservice_name = 'mwvaughn/bar_annotation_v1.0.0'
    adama_microservice = g.agent(
        ap[microservice_name], {
            'dcterms:title':
            "BAR Annotation Service",
            'dcterms:description':
            "Returns annotation from locus ID",
            'dcterms:language':
            "en-US",
            'dcterms:identifier':
            "https://api.araport.org/community/v0.3/mwvaughn/bar_annotation_v1.0.0",
            'dcterms:source':
            "https://github.com/Arabidopsis-Information-Portal/prov-enabled-api-sample"
        })

    # the microservice was generated by me on date X (don't use now, use when the service was updated)
    g.wasGeneratedBy(adama_microservice, me, datetime.datetime.now())
    # The microservice used the platform now
    g.used(adama_microservice, adama_platform, datetime.datetime.now())

    # Sources
    #
    # Define BAR
    # Agents
    nick = g.agent(
        ap['nicholas_provart'], {
            'prov:type': PROV["Person"],
            'foaf:givenName': "Nicholas Provart",
            'foaf:mbox': "*****@*****.**"
        })
    utoronto = g.agent(
        ap['university_of_toronto'], {
            'prov:type': PROV["Organization"],
            'foaf:givenName': "University of Toronto",
            'dcterms:identifier': "http://www.utoronto.ca/"
        })
    g.actedOnBehalfOf(nick, utoronto)

    # Entity
    # All fields derived from Sources.yml
    # dcterms:title and dcterms:description come straight from the YAML
    # dcterms:identifier - URI pointing to the source's canonical URI representation
    # optional - dcterms:language: Recommended best practice is to use a controlled vocabulary such as RFC 4646
    # optional - dcterms:updated: date the source was published or last updated
    # optional - dcterms:license: Simple string or URI to license. Validate URI if provided?
    datasource1 = g.entity(
        ap['datasource1'], {
            'dcterms:title': "BAR Arabidopsis AGI -> Annotation",
            'dcterms:description': "Most recent annotation for given AGI",
            'dcterms:language': "en-US",
            'dcterms:identifier':
            "http://bar.utoronto.ca/webservices/agiToAnnot.php",
            'dcterms:updated': "2015-04-17T09:44:56",
            'dcterms:license': "Creative Commons 3.0"
        })
    # Set up attribution to Nick
    g.wasAttributedTo(datasource1, nick)

    # Define TAIR
    # Agents
    # dcterms:language: Recommended best practice is to use a controlled vocabulary such as RFC 4646
    eva = g.agent(ap['eva_huala'], {
        'prov:type': PROV["Person"],
        'foaf:givenName': "Eva Huala"
    })
    phoenix = g.agent(
        ap['phoenix_bioinformatics'], {
            'prov:type': PROV["Organization"],
            'foaf:givenName': "Phoenix Bioinformatics"
        })
    g.actedOnBehalfOf(eva, phoenix)

    # Entity
    # All fields derived from Sources.yml
    # optional - dcterms:citation: Plain text bibliographic citation. If only provided as doi, should we try to validate it?
    datasource2 = g.entity(
        ap['datasource2'], {
            'dcterms:title':
            "TAIR",
            'dcterms:description':
            "The Arabidopsis Information Resource",
            'dcterms:language':
            "en-US",
            'dcterms:identifier':
            "https://www.arabidopsis.org/",
            'dcterms:citation':
            "The Arabidopsis Information Resource (TAIR): improved gene annotation and new tools. Nucleic Acids Research 2011 doi: 10.1093/nar/gkr1090"
        })
    g.wasAttributedTo(datasource2, eva)

    # In Sources.yml, these two sources are nested. Define that relationship here
    # There are other types of relationships but we will just use derived from for simplicity in this prototype
    g.wasDerivedFrom(ap['datasource1'], ap['datasource2'])

    # Depending on which ADAMA microservice type we are using, define an activity
    # Eventually, break these into more atomic actions in a chain
    action1 = g.activity(ap['do_query'], datetime.datetime.now())
    # action1 = g.activity(ap['do_map'], datetime.datetime.now())
    # action1 = g.activity(ap['do_generic'], datetime.datetime.now())
    # action1 = g.activity(ap['do_passthrough'], datetime.datetime.now())
    # Future... Support for ADAMA-native microservices
    # action1 = g.activity(ap['generate'], datetime.datetime.now())

    # Define current ADAMA response as an Entity
    # This is what's being returned to the user and is thus the subject of the PROV record
    # May be able to add more attributes to it but this is the minimum
    response = g.entity(ap['adama_response'])

    # Response is generated by the process_query action
    # Time-stamp it!
    g.wasGeneratedBy(response, ap['do_query'], datetime.datetime.now())
    # The process_query used the microservice
    g.used(ap['do_query'], adama_microservice, datetime.datetime.now())
    # The microservice used datasource1
    g.used(adama_microservice, datasource1, datetime.datetime.now())

    # Print prov_n
    print(g.get_provn())
    # Print prov-json
    print(g.serialize())
    # Write out as a pretty picture
    graph = prov.dot.prov_to_dot(g)
    graph.write_png('Sources.png')
Beispiel #5
0
class NIDMExporter():
    """ 
    Generic class to parse a result directory to extract the pieces of 
    information to be stored in NIDM-Results and to generate a NIDM-Results 
    export.
    """
    def parse(self):
        """ 
        Parse a result directory to extract the pieces information to be 
        stored in NIDM-Results. 
        """
        # Methods: find_software, find_model_fitting, find_contrasts and
        # find_inferences should be defined in the children classes and return
        # a list of NIDM Objects as specified in the objects module

        # Object of type Software describing the neuroimaging software package
        # used for the analysis
        self.software = self._find_software()

        # List of objects of type ModelFitting describing the model fitting
        # step in NIDM-Results (main activity: Model Parameters Estimation)
        self.model_fittings = self._find_model_fitting()

        # Dictionary of (key, value) pairs where where key is a tuple
        # containing the identifier of a ModelParametersEstimation object and a
        # tuple of identifiers of ParameterEstimateMap objects and value is an
        # object of type Contrast describing the contrast estimation step in
        # NIDM-Results (main activity: Contrast Estimation)
        self.contrasts = self._find_contrasts()

        # Inference activity and entities
        # Dictionary of (key, value) pairs where key is the identifier of a
        # ContrastEstimation object and value is an object of type Inference
        # describing the inference step in NIDM-Results (main activity:
        # Inference)
        self.inferences = self._find_inferences()

        # Initialise prov document
        self.doc = ProvDocument()
        self._add_namespaces()

    def export(self):
        """ 
        Generate a NIDM-Results export. 
        """
        if not os.path.isdir(self.export_dir):
            os.mkdir(self.export_dir)

        # Initialise main bundle
        self._create_bundle(self.version)
        self.bundle.update(self.software.export())

        # Add model fitting steps
        for model_fitting in self.model_fittings:
            self.bundle.update(model_fitting.export())
            self.bundle.wasAssociatedWith(model_fitting.activity.id,
                                          self.software.id)

        # Add contrast estimation steps
        for (model_fitting_id, pe_ids), contrasts in self.contrasts.items():
            model_fitting = self._get_model_fitting(model_fitting_id)
            for contrast in contrasts:
                self.bundle.update(contrast.export())
                self.bundle.used(contrast.estimation.id,
                                 model_fitting.rms_map.id)
                self.bundle.used(contrast.estimation.id,
                                 model_fitting.mask_map.id)
                self.bundle.wasAssociatedWith(contrast.estimation.id,
                                              self.software.id)
                for pe_id in pe_ids:
                    self.bundle.used(contrast.estimation.id, pe_id)

        # Add inference steps
        for contrast_id, inferences in self.inferences.items():
            contrast = self._get_contrast(contrast_id)
            for inference in inferences:
                self.bundle.update(inference.export())
                if contrast.z_stat_map:
                    used_id = contrast.z_stat_map.id
                else:
                    used_id = contrast.stat_map.id
                self.bundle.used(inference.id, used_id)
                self.bundle.wasAssociatedWith(inference.id, self.software.id)

        # Write-out prov file
        self.save_prov_to_files()

        return self.export_dir

    def _get_model_fitting(self, mf_id):
        """ 
        Retreive model fitting with identifier 'mf_id' from the list of model 
        fitting objects stored in self.model_fittings
        """
        for model_fitting in self.model_fittings:
            if model_fitting.activity.id == mf_id:
                return model_fitting
        raise Exception("Model fitting activity with id: "+str(mf_id)+\
            " not found.")

    def _get_contrast(self, con_id):
        """ 
        Retreive contrast with identifier 'con_id' from the list of contrast 
        objects stored in self.contrasts
        """
        for contrasts in self.contrasts.values():
            for contrast in contrasts:
                if contrast.estimation.id == con_id:
                    return contrast
        raise Exception("Contrast activity with id: "+str(con_id)+\
            " not found.")

    def _add_namespaces(self):
        """ 
        Add namespaces to NIDM document. 
        """
        self.doc.add_namespace(NIDM)
        self.doc.add_namespace(NIIRI)
        self.doc.add_namespace(CRYPTO)
        self.doc.add_namespace(DCT)

    def _create_bundle(self, version):
        """ 
        Initialise NIDM-Results bundle.
        """
        software_lc = self.software.name.lower()
        software_uc = self.software.name.upper()

        bundle_id = NIIRI[str(uuid.uuid4())]
        self.bundle = ProvBundle(identifier=bundle_id)

        self.doc.entity(bundle_id,
                        other_attributes=((
                            PROV['type'],
                            PROV['Bundle'],
                        ), (PROV['label'], software_uc + " Results"),
                                          (NIDM['objectModel'],
                                           NIDM[software_uc + 'Results']),
                                          (NIDM['version'], version)))

        self.doc.wasGeneratedBy(NIIRI[software_lc + '_results_id'],
                                time=str(datetime.datetime.now().time()))

    def _get_model_parameters_estimations(self, error_model):
        """
        Infer model estimation method from the 'error_model'. Return an object
        of type ModelParametersEstimation.
        """
        if error_model.dependance == INDEPEDENT_CORR:
            if error_model.variance_homo:
                estimation_method = ESTIMATION_OLS
            else:
                estimation_method = ESTIMATION_WLS
        else:
            estimation_method = ESTIMATION_GLS

        mpe = ModelParametersEstimation(estimation_method, self.software.id)

        return mpe

    def save_prov_to_files(self, showattributes=False):
        """
        Write-out provn serialisation to nidm.provn.
        """
        self.doc.add_bundle(self.bundle)
        provn_fid = open(os.path.join(self.export_dir, 'nidm.provn'), 'w')
        provn_fid.write(self.doc.get_provn(4))
def get_blank_prov_document():
    return ProvDocument(namespaces=all_namespaces)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description=
        "Process and generate provenance for a MIMIC patient admission")
    parser.add_argument("admission_id",
                        type=int,
                        help="The ID of admission to process")
    args = parser.parse_args()

    prov_doc1 = ProvDocument(namespaces=all_namespaces)
    admission = Admission(prov_doc1, args.admission_id)
    admission.process()

    filepath = output_path / f"{args.admission_id}.json"
    with filepath.open("w") as f:
        prov_doc1.serialize(f)
    provn_content = prov_doc1.get_provn()
    print(provn_content)
    with filepath.with_suffix(".provn").open("w") as f:
        f.write(provn_content)

    dot = prov_to_dot(prov_doc1)
    dot.write_pdf(filepath.with_suffix(".pdf"))
    db.close_session()
def example():

    g = ProvDocument()
    # Local namespace
    # Doesnt exist yet so we are creating it
    ap = Namespace('aip', 'https://araport.org/provenance/')
    # Dublin Core
    g.add_namespace("dcterms", "http://purl.org/dc/terms/")
    # FOAF
    g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")

    # Add sponsors and contributors as Agents
    # ap['matthew_vaughn']
    # aip:matthew_vaughn
    # https://araport.org/provenance/:matthew_vaughn
    # Learn this from a call to profiles service? Adds a dependency on Agave so I am open to figuring out another way
    me = g.agent(ap['matthew_vaughn'], {
        'prov:type': PROV["Person"], 'foaf:givenName': "Matthew Vaughn", 'foaf:mbox': "<mailto:[email protected]>"
    })
    # Hard coded for now
    walter = g.agent(ap['walter_moreira'], {
        'prov:type': PROV["Person"], 'foaf:givenName': "Walter Moreira", 'foaf:mbox': "<mailto:[email protected]>"
    })
    utexas = g.agent(ap['university_of_texas'], {
        'prov:type': PROV["Organization"], 'foaf:givenName': "University of Texas at Austin"
    })

    # Set delegation to our host University
    # We may have trouble doing this for other users since we don't always capture their host instituion
    g.actedOnBehalfOf(walter, utexas)
    g.actedOnBehalfOf(me, utexas)

    # Include the ADAMA platform as an Agent and set attribution
    # dcterms:title and dcterms:description are hardcoded
    # dcterms:language is hard-coded
    # dcterms:source is the URI of the public git source repository for ADAMA
    # "dcterms:updated": "2015-04-17T09:44:56" - this would actually be the date ADAMA was updated
    adama_platform = g.agent(ap['adama_platform'], {'dcterms:title': "ADAMA", 'dcterms:description': "Araport Data and Microservices API", 'dcterms:language':"en-US", 'dcterms:identifier':"https://api.araport.org/community/v0.3/", 'dcterms:updated': "2015-04-17T09:44:56" })
    g.wasGeneratedBy(adama_platform, walter)

    # Include the ADAMA microservice as an Agent and set attribution+delegation
    # dcterms:title and dcterms:description are inherited from the service's metadata
    # dcterms:language is hard-coded
    # dcterms:identifier is the deployment URI for the service
    # dcterms:source is the URI of the public git source repository. The URL in this example is just a dummy
    #
    # The name for each microservice should be unique. We've decided to
    # use the combination of namespace, service name, and version
    microservice_name = 'mwvaughn/bar_annotation_v1.0.0'
    adama_microservice = g.agent(ap[microservice_name], {'dcterms:title': "BAR Annotation Service", 'dcterms:description': "Returns annotation from locus ID", 'dcterms:language':"en-US", 'dcterms:identifier':"https://api.araport.org/community/v0.3/mwvaughn/bar_annotation_v1.0.0", 'dcterms:source':"https://github.com/Arabidopsis-Information-Portal/prov-enabled-api-sample" })

    # the microservice was generated by me on date X (don't use now, use when the service was updated)
    g.wasGeneratedBy(adama_microservice, me, datetime.datetime.now())
    # The microservice used the platform now
    g.used(adama_microservice, adama_platform, datetime.datetime.now())

    # Sources
    #
    # Define BAR
    # Agents
    nick = g.agent(ap['nicholas_provart'], {
        'prov:type': PROV["Person"], 'foaf:givenName': "Nicholas Provart", 'foaf:mbox': "*****@*****.**"
    })
    utoronto = g.agent(ap['university_of_toronto'], {
        'prov:type': PROV["Organization"], 'foaf:givenName': "University of Toronto", 'dcterms:identifier':"http://www.utoronto.ca/"
    })
    g.actedOnBehalfOf(nick, utoronto)

    # Entity
    # All fields derived from Sources.yml
    # dcterms:title and dcterms:description come straight from the YAML
    # dcterms:identifier - URI pointing to the source's canonical URI representation
    # optional - dcterms:language: Recommended best practice is to use a controlled vocabulary such as RFC 4646
    # optional - dcterms:updated: date the source was published or last updated
    # optional - dcterms:license: Simple string or URI to license. Validate URI if provided?
    datasource1 = g.entity(ap['datasource1'], {'dcterms:title': "BAR Arabidopsis AGI -> Annotation", 'dcterms:description': "Most recent annotation for given AGI", 'dcterms:language':"en-US", 'dcterms:identifier':"http://bar.utoronto.ca/webservices/agiToAnnot.php", 'dcterms:updated':"2015-04-17T09:44:56", 'dcterms:license':"Creative Commons 3.0" })
    # Set up attribution to Nick
    g.wasAttributedTo(datasource1, nick)

    # Define TAIR
    # Agents
    # dcterms:language: Recommended best practice is to use a controlled vocabulary such as RFC 4646
    eva = g.agent(ap['eva_huala'], {
        'prov:type': PROV["Person"], 'foaf:givenName': "Eva Huala"
    })
    phoenix = g.agent(ap['phoenix_bioinformatics'], {
        'prov:type': PROV["Organization"], 'foaf:givenName': "Phoenix Bioinformatics"
    })
    g.actedOnBehalfOf(eva, phoenix)

    # Entity
    # All fields derived from Sources.yml
    # optional - dcterms:citation: Plain text bibliographic citation. If only provided as doi, should we try to validate it?
    datasource2 = g.entity(ap['datasource2'], {'dcterms:title': "TAIR", 'dcterms:description': "The Arabidopsis Information Resource", 'dcterms:language':"en-US", 'dcterms:identifier':"https://www.arabidopsis.org/", 'dcterms:citation':"The Arabidopsis Information Resource (TAIR): improved gene annotation and new tools. Nucleic Acids Research 2011 doi: 10.1093/nar/gkr1090"})
    g.wasAttributedTo(datasource2, eva)

    # In Sources.yml, these two sources are nested. Define that relationship here
    # There are other types of relationships but we will just use derived from for simplicity in this prototype
    g.wasDerivedFrom(ap['datasource1'], ap['datasource2'])

    # Depending on which ADAMA microservice type we are using, define an activity
    # Eventually, break these into more atomic actions in a chain
    action1 = g.activity(ap['do_query'], datetime.datetime.now())
    # action1 = g.activity(ap['do_map'], datetime.datetime.now())
    # action1 = g.activity(ap['do_generic'], datetime.datetime.now())
    # action1 = g.activity(ap['do_passthrough'], datetime.datetime.now())
    # Future... Support for ADAMA-native microservices
    # action1 = g.activity(ap['generate'], datetime.datetime.now())

    # Define current ADAMA response as an Entity
    # This is what's being returned to the user and is thus the subject of the PROV record
    # May be able to add more attributes to it but this is the minimum
    response = g.entity(ap['adama_response'])

    # Response is generated by the process_query action
    # Time-stamp it!
    g.wasGeneratedBy(response, ap['do_query'], datetime.datetime.now())
    # The process_query used the microservice
    g.used(ap['do_query'], adama_microservice, datetime.datetime.now())
    # The microservice used datasource1
    g.used(adama_microservice, datasource1, datetime.datetime.now())

    # Print prov_n
    print(g.get_provn())
    # Print prov-json
    print(g.serialize())
    # Write out as a pretty picture
    graph = prov.dot.prov_to_dot(g)
    graph.write_png('Sources.png')
Beispiel #8
0
def get_prov():
    doc = ProvDocument()
    doc.set_default_namespace('http://roocs.org/')
    doc.get_provn()
        logpage_ident = get_logpage(str(logpage), prov_doc)

    return plate_ident


# Create a new provenance document
d1 = ProvDocument()
declare_namespaces(d1)
# get V468Cyg
# get_plate
# process = get_process('2180', d1)
try:
    # scan = get_entity('2462','scan', d1)
    id = '2180'
    prov_type = 'lightcurve'
    # plate_name = get_entity(id,prov_type, d1)
    # process_name = get_process('9804',d1)
    # logpage_name = get_logpage('10085',d1)
    # source_id = get_source('40000001', d1)
    # plate_ident = get_plate_prov(id, d1)
    id = get_lightcurve('614-089373', d1)
except TypeError:
    print('the job is still executing...')

print(d1.get_provn())

filename = 'prov_' + prov_type + id
d1.serialize(filename + '.xml', format='xml')
dot = prov_to_dot(d1)
dot.write_png(filename + '.png')