def save_provenance(prov_doc: ProvDocument, filepath: Path): logging.debug("Saving provenance files:") logging.debug(" - %s", filepath) with filepath.open("w") as f: prov_doc.serialize(f) provn_content = prov_doc.get_provn() filepath = filepath.with_suffix(".provn") logging.debug(" - %s", filepath) with filepath.open("w") as f: f.write(provn_content)
class BioProvDocument: """ Class containing base provenance information for a Project. """ def __init__( self, project, add_attributes=False, add_users=True, _add_project_namespaces=True, _iter_samples=True, _iter_project=True, ): """ Constructs the W3C-PROV document for a project. :param Project project: instance of bioprov.src.Project. :param bool add_attributes: whether to add object attributes. :param bool add_users: whether to add users and environments. :param bool _add_project_namespaces: :param bool _iter_samples: :param bool _iter_project: """ # Assert Project is good before constructing instance assert isinstance(project, Project), Warnings()["incorrect_type"](project, Project) self.ProvDocument = ProvDocument() self.project = project self.project.document = self.ProvDocument self._dot = prov_to_dot(self.ProvDocument) self._provn = self.ProvDocument.get_provn() self._entities = dict() self._activities = dict() self._agents = dict() self._user_bundles = dict() self._provstore_document = None # Don't add attributes if you plan on exporting to graphic format self.add_attributes = add_attributes # Set this before running Namespaces if add_users: self._create_envs_and_users = True else: self._create_envs_and_users = False # Default actions to create the document if _add_project_namespaces: self._add_project_namespaces() if self._create_envs_and_users: self._iter_envs_and_users() if _iter_project: self._iter_project() if _iter_samples: self._iter_samples() def __repr__(self): return "BioProvDocument describing Project '{}' with {} samples.".format( self.project.tag, len(self.project)) @property def dot(self): self._dot = prov_to_dot(self.ProvDocument) return self._dot @dot.setter def dot(self, value): self._dot = value @property def provn(self): self._provn = self.ProvDocument.get_provn() return self._provn @provn.setter def provn(self, value): self._provn = value @property def provstore_document(self): self._provstore_document = self.ProvDocument return self._provstore_document @provstore_document.setter def provstore_document(self, value): self._provstore_document = value def _add_project_namespaces(self): """ Runs the three _add_namespace functions. :return: """ self._add_project_namespace() if self._create_envs_and_users: self._add_env_and_user_namespace() self._add_samples_namespace() self._add_activities_namespace() def _add_project_namespace(self): """ Creates the Project Namespace and Project Entity. # Sets the default Namespace of the BioProvDocument as the Project. :return: updates self.project and self.ProvDocument. """ self.ProvDocument.add_namespace("project", str(self.project)) def _add_env_and_user_namespace(self): self.ProvDocument.add_namespace( "users", f"Users associated with BioProv Project '{self.project.tag}'") def _add_samples_namespace(self): self.ProvDocument.add_namespace( "samples", f"Samples associated with bioprov Project '{self.project.tag}'", ) def _add_files_namespace(self): self.ProvDocument.add_namespace( "files", f"Files associated with bioprov Project '{self.project.tag}'") def _iter_project(self): self._create_sample_bundle(self.project, kind="Project") self._create_sample_file_entities(self.project, kind="Project") self._create_program_entities(self.project, kind="Project") def _iter_envs_and_users(self): for _user, _env_dict in self.project.users.items(): _user_preffix = f"users:{_user}" _user_bundle = self._user_bundles[ _user] = self.ProvDocument.bundle(_user_preffix) _user_bundle.set_default_namespace(_user) _user_bundle.add_namespace( "envs", f"Environments associated with User '{_user}'") self._agents[_user] = _user_bundle.agent(_user_preffix) def _iter_samples(self): for _, sample in self.project.samples.items(): for statement in ( self._create_sample_bundle(sample), self._create_sample_file_entities(sample), self._create_program_entities(sample), ): try: statement except KeyError: config.logger.debug( f"Could not run function '{statement.__name__}' for sample {sample.name}." ) pass def _create_sample_bundle(self, object_, kind="Sample"): """ Creates a ProvBundle for the Sample and associates it to self.ProvDocument. :param object_: instance of bioprov.Sample :return: updates self.ProvDocument by creating PROV objects for the sample. """ choices = ("Sample", "Project") assert kind in choices, Warnings()["choices"](kind, choices, "kind") # Sample PROV attributes: bundle, namespace, entity object_.ProvBundle = self.ProvDocument.bundle( object_.namespace_preffix) object_.ProvBundle.set_default_namespace(object_.name) self._entities[ object_.name] = object_.entity = object_.ProvBundle.entity( object_.namespace_preffix) if kind == "Sample": object_.ProvBundle.wasDerivedFrom(self._entities[object_.name], self.project.entity) def _create_sample_file_entities(self, sample, kind="Sample"): """ Creates a ProvBundle for the Sample and associates it to self.ProvDocument. :param sample: instance of bioprov.Sample :return: updates the sample.ProvBundle by creating PROV objects for the files. """ sample.files_namespace_preffix = "files" sample.file_namespace = sample.ProvBundle.add_namespace( sample.files_namespace_preffix, f"Files associated with {kind} {sample.name}", ) # Files PROV attributes: namespace, entities for key, file in sample.files.items(): # This prevents errors when the file refers to a project csv or JSON if file.name == sample.name: file.name = file.basename # Same function call, but in the first we pass the 'other_attributes' argument if self.add_attributes: self._entities[file.name] = sample.ProvBundle.entity( f"{sample.files_namespace_preffix}:{file.tag}", other_attributes=build_prov_attributes( file.serializer(), sample.file_namespace), ) else: self._entities[file.name] = sample.ProvBundle.entity( f"{sample.files_namespace_preffix}:{file.tag}", ) # Adding relationships sample.ProvBundle.wasDerivedFrom( self._entities[file.name], self._entities[sample.name], ) def _create_program_entities(self, sample, kind="Sample"): # Programs PROV attributes: namespace, entities programs_namespace_prefix = f"programs" programs_namespace = sample.ProvBundle.add_namespace( programs_namespace_prefix, f"Programs associated with {kind} {sample.name}", ) for key, program in sample.programs.items(): last_run = program.runs[str(len(program.runs))] # We want to exclude _runs from the program serializer # So we put a custom serializer filter keys = ("sample", "_runs") serialized_program = serializer_filter(program, keys) try: del serialized_program["params"] except KeyError: pass # Same function call, but in the first we pass the 'other_attributes' argument if self.add_attributes: self._activities[program.name] = sample.ProvBundle.activity( f"{programs_namespace_prefix}:{program.name}", startTime=last_run.start_time, endTime=last_run.end_time, other_attributes=build_prov_attributes( serialized_program, programs_namespace), ) else: self._activities[program.name] = sample.ProvBundle.activity( f"{programs_namespace_prefix}:{program.name}", startTime=last_run.start_time, endTime=last_run.end_time, ) if self._create_envs_and_users: for _user, _env_dict in self.project.users.items(): _user_bundle = self._user_bundles[_user] for _env_hash, _env in _env_dict.items(): if _env_hash == last_run.env: if self.add_attributes: self._agents[_env_hash] = _user_bundle.agent( f"envs:{_env}", other_attributes=build_prov_attributes( _env.env_dict, _env.env_namespace), ) else: self._agents[_env_hash] = _user_bundle.agent( f"envs:{_env}") if not _env.actedOnBehalfOf: _user_bundle.actedOnBehalfOf( self._agents[_env_hash], self._agents[_user]) _env.actedOnBehalfOf = True sample.ProvBundle.wasAssociatedWith( self._activities[program.name], self._agents[last_run.env]) inputs, outputs = self._get_IO_from_params(program) self._add_IO_relationships(sample, program, inputs, "input") self._add_IO_relationships(sample, program, outputs, "output") def _add_IO_relationships(self, sample, program, io_list, io_type): # TODO: replace Sample for Project when implementing Project.files and programs """ Add PROV relationships between Program and input/output files. :param sample: instance of bioprov.Sample :param program: instance of bioprov.Program :param io_list: list of input/output files :param io_type: 'input' or 'output' :return: Adds relationship between """ # Small assertion block choices = ("input", "output") assert io_type in choices, Warnings()["choices"](io_type, choices, "io_type") # Start function sample_files = [str(file) for _, file in sample.files.items()] for value in io_list: if value in sample_files: file_obj = [ file_ for _, file_ in sample.files.items() if str(file_) == value ] if file_obj: file_obj, *_ = file_obj if io_type == "input": sample.ProvBundle.used( self._entities[file_obj.name], self._activities[program.name], ) elif io_type == "output": sample.ProvBundle.wasGeneratedBy( self._entities[file_obj.name], self._activities[program.name], ) @staticmethod def _get_IO_from_params(program): """ :param program: instance of bioprov.Program :return: list of input parameter values and list of output parameter values """ # Relationships based on Parameters inputs, outputs = [], [] for _, parameter in program.params.items(): assert isinstance(parameter, Parameter), ( Warnings()["incorrect_type"](parameter, Parameter) + "\nPlease check if Programs were correctly deserialized.") if parameter.kind == "input": # This loop is because some positional arguments may have empty values (value stored in parameter.key) if parameter.value: inputs.append(parameter.value) else: inputs.append(parameter.key) elif parameter.kind == "output": if parameter.value: outputs.append(parameter.value) else: outputs.append(parameter.key) return inputs, outputs def _add_activities_namespace(self): """ Add activities Namespace to self. :return: """ if len(self.ProvDocument.namespaces) == 0: self.ProvDocument.add_namespace( "activities", f"Activities associated with bioprov Project '{self.project.tag}'", ) def upload_to_provstore(self, api=None): """ Uploads self.ProvDocument. to ProvStore (https://openprovenance.org/store/) :param api: provstore.api.Api :return: Sends POST request to ProvStore API and updates self.ProvDocument if successful. """ if api is None: api = config.provstore_api try: self.provstore_document = api.document.create( self.ProvDocument, name=self.project.tag) except ConnectionError: logging.error( "Could not create remote document. Please check your internet connection and ProvStore credentials." ) def write_provn(self, path=None): """ Writes PROVN output of document. :param path: Path to write file. :return: Writes file. """ if path is None: path = f"./{self.project.tag}_provn" if self.add_attributes: path += "_attrs" path += ".txt" path = Path(path) assert ( path.parent.exists() ), f"Directory '{path.parent}' not found.\nPlease provide a valid directory." if path.exists(): logging.info(f"Overwriting file at '{path}'") with open(path, "w") as f: f.write(self.provn) if path.exists(): logging.info(f"Wrote PROVN record to {path}.")
class NIDMObjectsUnitTesting(unittest.TestCase): """ Unit testing of NIDM objects (compared to examples provided in nidm-results.owl) """ def setUp(self): self.export_dir = os.path.join(TEST_FOLDER, 'nidm') if not os.path.isdir(self.export_dir): os.mkdir(self.export_dir) # Retreive owl file for NIDM-Results owl_file = os.path.join(TERM_RESULTS_DIR, 'nidm-results.owl') assert owl_file self.owl = OwlReader(owl_file) self.doc = ProvDocument() # self.bundle = ProvBundle(identifier=NIIRI[software_lc+'_results_id']) self.provn_file = os.path.join(self.export_dir, 'unit_test.provn') namespaces_file = os.path.join(TERM_RESULTS_DIR, "templates", \ "Namespaces.txt") namespaces_fid = open(namespaces_file) self.prefixes = namespaces_fid.read() namespaces_fid.close() self.to_delete_files = [self.provn_file] self.gt_ttl_files = list() def test_design_matrix(self): mat = np.matrix('1 2; 3 4') mat_image = os.path.join(os.path.dirname(TEST_FOLDER), "data", \ "fmri.feat", "design.png") design_matrix = DesignMatrix(mat, mat_image, self.export_dir) self.doc.update(design_matrix.export()) # In the FSL export the design matrix contains both the Design Matrix # entity and the Image entity representing the design matrix # visualisation. self.to_delete_files.append(os.path.join(self.export_dir, \ "DesignMatrix.csv")) self.to_delete_files.append(os.path.join(self.export_dir, \ "DesignMatrix.png")) gt_file = self.owl.get_example(NIDM['DesignMatrix']) self.gt_ttl_files = [os.path.join(TERM_RESULTS_DIR, \ gt_file.replace("file://./", "")), os.path.join(TERM_RESULTS_DIR, "examples", "Image-DesignMatrix.txt")] self._create_gt_and_compare("Design Matrix") def test_data(self): data = Data(grand_mean_scaling=True, target=100.0) self.doc.update(data.export()) gt_file = self.owl.get_example(NIDM['Data']) self.gt_ttl_files.append(os.path.join(TERM_RESULTS_DIR, \ gt_file.replace("file://./", ""))) self._create_gt_and_compare("Data") # INDEPEDENT_CORR = NIDM['IndependentError'] # SERIALLY_CORR = NIDM['SeriallyCorrelatedError'] # COMPOUND_SYMMETRY_CORR = NIDM['CompoundSymmetricError'] # ARBITRARILY_CORR = NIDM['ArbitriralyCorrelatedError'] # def test_error_model_indepdt_global(self): # error_distribution = GAUSSIAN_DISTRIBUTION # variance_homo = True # variance_spatial = SPATIALLY_GLOBAL # dependance = INDEPEDENT_CORR # dependance_spatial = SPATIALLY_GLOBAL # error_model = ErrorModel(error_distribution, variance_homo, # variance_spatial, dependance, dependance_spatial) # self.doc.update(error_model.export()) # nidm_classes = { # "ErrorModel": dict( # error_model_id="niiri:error_model_id", # noise_distribution="nidm:GaussianDistribution", # variance_homo="true", # variance_spatial="nidm:SpatiallyGlobal", # dependence="nidm:IndependentError", # dependence_spatial="nidm:SpatiallyLocal" # ) # } # self._create_gt_and_compare(nidm_classes, "Data") def _create_gt_and_compare(self, class_name): # Write-out current example in a provn file and convert to turtle provn_fid = open(self.provn_file, 'w') provn_fid.write(self.doc.get_provn()) provn_fid.close() ttl_file = self.provn_file.replace(".provn", ".ttl") call("provconvert -infile "+self.provn_file+" -outfile "+ttl_file, \ shell=True) self.to_delete_files.append(ttl_file) # Load current example graph ex_graph = Graph() ex_graph.parse(source=ttl_file, format='turtle') # Read and concatenate ground truth files gt = "" for gt_ttl_file in self.gt_ttl_files: gt_fid = open(gt_ttl_file) # What is described in the examples to be at any path is relative # in export gt = gt.replace("/path/to/", "./") gt = gt+gt_fid.read() gt_fid.close() gt_graph = Graph() gt = self.prefixes+gt gt_graph.parse(data=gt, format='turtle') # Compare graphs found_diff = compare_graphs(ex_graph, gt_graph) if found_diff: raise Exception("Difference in "+class_name+".") def tearDown(self): # Delete files created for testing for to_delete_file in self.to_delete_files: if os.path.isfile(to_delete_file): os.remove(to_delete_file) os.rmdir(self.export_dir)
def example(): g = ProvDocument() # Local namespace # Doesnt exist yet so we are creating it ap = Namespace('aip', 'https://araport.org/provenance/') # Dublin Core g.add_namespace("dcterms", "http://purl.org/dc/terms/") # FOAF g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/") # Add sponsors and contributors as Agents # ap['matthew_vaughn'] # aip:matthew_vaughn # https://araport.org/provenance/:matthew_vaughn # Learn this from a call to profiles service? Adds a dependency on Agave so I am open to figuring out another way me = g.agent( ap['matthew_vaughn'], { 'prov:type': PROV["Person"], 'foaf:givenName': "Matthew Vaughn", 'foaf:mbox': "<mailto:[email protected]>" }) # Hard coded for now walter = g.agent( ap['walter_moreira'], { 'prov:type': PROV["Person"], 'foaf:givenName': "Walter Moreira", 'foaf:mbox': "<mailto:[email protected]>" }) utexas = g.agent( ap['university_of_texas'], { 'prov:type': PROV["Organization"], 'foaf:givenName': "University of Texas at Austin" }) # Set delegation to our host University # We may have trouble doing this for other users since we don't always capture their host instituion g.actedOnBehalfOf(walter, utexas) g.actedOnBehalfOf(me, utexas) # Include the ADAMA platform as an Agent and set attribution # dcterms:title and dcterms:description are hardcoded # dcterms:language is hard-coded # dcterms:source is the URI of the public git source repository for ADAMA # "dcterms:updated": "2015-04-17T09:44:56" - this would actually be the date ADAMA was updated adama_platform = g.agent( ap['adama_platform'], { 'dcterms:title': "ADAMA", 'dcterms:description': "Araport Data and Microservices API", 'dcterms:language': "en-US", 'dcterms:identifier': "https://api.araport.org/community/v0.3/", 'dcterms:updated': "2015-04-17T09:44:56" }) g.wasGeneratedBy(adama_platform, walter) # Include the ADAMA microservice as an Agent and set attribution+delegation # dcterms:title and dcterms:description are inherited from the service's metadata # dcterms:language is hard-coded # dcterms:identifier is the deployment URI for the service # dcterms:source is the URI of the public git source repository. The URL in this example is just a dummy # # The name for each microservice should be unique. We've decided to # use the combination of namespace, service name, and version microservice_name = 'mwvaughn/bar_annotation_v1.0.0' adama_microservice = g.agent( ap[microservice_name], { 'dcterms:title': "BAR Annotation Service", 'dcterms:description': "Returns annotation from locus ID", 'dcterms:language': "en-US", 'dcterms:identifier': "https://api.araport.org/community/v0.3/mwvaughn/bar_annotation_v1.0.0", 'dcterms:source': "https://github.com/Arabidopsis-Information-Portal/prov-enabled-api-sample" }) # the microservice was generated by me on date X (don't use now, use when the service was updated) g.wasGeneratedBy(adama_microservice, me, datetime.datetime.now()) # The microservice used the platform now g.used(adama_microservice, adama_platform, datetime.datetime.now()) # Sources # # Define BAR # Agents nick = g.agent( ap['nicholas_provart'], { 'prov:type': PROV["Person"], 'foaf:givenName': "Nicholas Provart", 'foaf:mbox': "*****@*****.**" }) utoronto = g.agent( ap['university_of_toronto'], { 'prov:type': PROV["Organization"], 'foaf:givenName': "University of Toronto", 'dcterms:identifier': "http://www.utoronto.ca/" }) g.actedOnBehalfOf(nick, utoronto) # Entity # All fields derived from Sources.yml # dcterms:title and dcterms:description come straight from the YAML # dcterms:identifier - URI pointing to the source's canonical URI representation # optional - dcterms:language: Recommended best practice is to use a controlled vocabulary such as RFC 4646 # optional - dcterms:updated: date the source was published or last updated # optional - dcterms:license: Simple string or URI to license. Validate URI if provided? datasource1 = g.entity( ap['datasource1'], { 'dcterms:title': "BAR Arabidopsis AGI -> Annotation", 'dcterms:description': "Most recent annotation for given AGI", 'dcterms:language': "en-US", 'dcterms:identifier': "http://bar.utoronto.ca/webservices/agiToAnnot.php", 'dcterms:updated': "2015-04-17T09:44:56", 'dcterms:license': "Creative Commons 3.0" }) # Set up attribution to Nick g.wasAttributedTo(datasource1, nick) # Define TAIR # Agents # dcterms:language: Recommended best practice is to use a controlled vocabulary such as RFC 4646 eva = g.agent(ap['eva_huala'], { 'prov:type': PROV["Person"], 'foaf:givenName': "Eva Huala" }) phoenix = g.agent( ap['phoenix_bioinformatics'], { 'prov:type': PROV["Organization"], 'foaf:givenName': "Phoenix Bioinformatics" }) g.actedOnBehalfOf(eva, phoenix) # Entity # All fields derived from Sources.yml # optional - dcterms:citation: Plain text bibliographic citation. If only provided as doi, should we try to validate it? datasource2 = g.entity( ap['datasource2'], { 'dcterms:title': "TAIR", 'dcterms:description': "The Arabidopsis Information Resource", 'dcterms:language': "en-US", 'dcterms:identifier': "https://www.arabidopsis.org/", 'dcterms:citation': "The Arabidopsis Information Resource (TAIR): improved gene annotation and new tools. Nucleic Acids Research 2011 doi: 10.1093/nar/gkr1090" }) g.wasAttributedTo(datasource2, eva) # In Sources.yml, these two sources are nested. Define that relationship here # There are other types of relationships but we will just use derived from for simplicity in this prototype g.wasDerivedFrom(ap['datasource1'], ap['datasource2']) # Depending on which ADAMA microservice type we are using, define an activity # Eventually, break these into more atomic actions in a chain action1 = g.activity(ap['do_query'], datetime.datetime.now()) # action1 = g.activity(ap['do_map'], datetime.datetime.now()) # action1 = g.activity(ap['do_generic'], datetime.datetime.now()) # action1 = g.activity(ap['do_passthrough'], datetime.datetime.now()) # Future... Support for ADAMA-native microservices # action1 = g.activity(ap['generate'], datetime.datetime.now()) # Define current ADAMA response as an Entity # This is what's being returned to the user and is thus the subject of the PROV record # May be able to add more attributes to it but this is the minimum response = g.entity(ap['adama_response']) # Response is generated by the process_query action # Time-stamp it! g.wasGeneratedBy(response, ap['do_query'], datetime.datetime.now()) # The process_query used the microservice g.used(ap['do_query'], adama_microservice, datetime.datetime.now()) # The microservice used datasource1 g.used(adama_microservice, datasource1, datetime.datetime.now()) # Print prov_n print(g.get_provn()) # Print prov-json print(g.serialize()) # Write out as a pretty picture graph = prov.dot.prov_to_dot(g) graph.write_png('Sources.png')
class NIDMExporter(): """ Generic class to parse a result directory to extract the pieces of information to be stored in NIDM-Results and to generate a NIDM-Results export. """ def parse(self): """ Parse a result directory to extract the pieces information to be stored in NIDM-Results. """ # Methods: find_software, find_model_fitting, find_contrasts and # find_inferences should be defined in the children classes and return # a list of NIDM Objects as specified in the objects module # Object of type Software describing the neuroimaging software package # used for the analysis self.software = self._find_software() # List of objects of type ModelFitting describing the model fitting # step in NIDM-Results (main activity: Model Parameters Estimation) self.model_fittings = self._find_model_fitting() # Dictionary of (key, value) pairs where where key is a tuple # containing the identifier of a ModelParametersEstimation object and a # tuple of identifiers of ParameterEstimateMap objects and value is an # object of type Contrast describing the contrast estimation step in # NIDM-Results (main activity: Contrast Estimation) self.contrasts = self._find_contrasts() # Inference activity and entities # Dictionary of (key, value) pairs where key is the identifier of a # ContrastEstimation object and value is an object of type Inference # describing the inference step in NIDM-Results (main activity: # Inference) self.inferences = self._find_inferences() # Initialise prov document self.doc = ProvDocument() self._add_namespaces() def export(self): """ Generate a NIDM-Results export. """ if not os.path.isdir(self.export_dir): os.mkdir(self.export_dir) # Initialise main bundle self._create_bundle(self.version) self.bundle.update(self.software.export()) # Add model fitting steps for model_fitting in self.model_fittings: self.bundle.update(model_fitting.export()) self.bundle.wasAssociatedWith(model_fitting.activity.id, self.software.id) # Add contrast estimation steps for (model_fitting_id, pe_ids), contrasts in self.contrasts.items(): model_fitting = self._get_model_fitting(model_fitting_id) for contrast in contrasts: self.bundle.update(contrast.export()) self.bundle.used(contrast.estimation.id, model_fitting.rms_map.id) self.bundle.used(contrast.estimation.id, model_fitting.mask_map.id) self.bundle.wasAssociatedWith(contrast.estimation.id, self.software.id) for pe_id in pe_ids: self.bundle.used(contrast.estimation.id, pe_id) # Add inference steps for contrast_id, inferences in self.inferences.items(): contrast = self._get_contrast(contrast_id) for inference in inferences: self.bundle.update(inference.export()) if contrast.z_stat_map: used_id = contrast.z_stat_map.id else: used_id = contrast.stat_map.id self.bundle.used(inference.id, used_id) self.bundle.wasAssociatedWith(inference.id, self.software.id) # Write-out prov file self.save_prov_to_files() return self.export_dir def _get_model_fitting(self, mf_id): """ Retreive model fitting with identifier 'mf_id' from the list of model fitting objects stored in self.model_fittings """ for model_fitting in self.model_fittings: if model_fitting.activity.id == mf_id: return model_fitting raise Exception("Model fitting activity with id: "+str(mf_id)+\ " not found.") def _get_contrast(self, con_id): """ Retreive contrast with identifier 'con_id' from the list of contrast objects stored in self.contrasts """ for contrasts in self.contrasts.values(): for contrast in contrasts: if contrast.estimation.id == con_id: return contrast raise Exception("Contrast activity with id: "+str(con_id)+\ " not found.") def _add_namespaces(self): """ Add namespaces to NIDM document. """ self.doc.add_namespace(NIDM) self.doc.add_namespace(NIIRI) self.doc.add_namespace(CRYPTO) self.doc.add_namespace(DCT) def _create_bundle(self, version): """ Initialise NIDM-Results bundle. """ software_lc = self.software.name.lower() software_uc = self.software.name.upper() bundle_id = NIIRI[str(uuid.uuid4())] self.bundle = ProvBundle(identifier=bundle_id) self.doc.entity(bundle_id, other_attributes=(( PROV['type'], PROV['Bundle'], ), (PROV['label'], software_uc + " Results"), (NIDM['objectModel'], NIDM[software_uc + 'Results']), (NIDM['version'], version))) self.doc.wasGeneratedBy(NIIRI[software_lc + '_results_id'], time=str(datetime.datetime.now().time())) def _get_model_parameters_estimations(self, error_model): """ Infer model estimation method from the 'error_model'. Return an object of type ModelParametersEstimation. """ if error_model.dependance == INDEPEDENT_CORR: if error_model.variance_homo: estimation_method = ESTIMATION_OLS else: estimation_method = ESTIMATION_WLS else: estimation_method = ESTIMATION_GLS mpe = ModelParametersEstimation(estimation_method, self.software.id) return mpe def save_prov_to_files(self, showattributes=False): """ Write-out provn serialisation to nidm.provn. """ self.doc.add_bundle(self.bundle) provn_fid = open(os.path.join(self.export_dir, 'nidm.provn'), 'w') provn_fid.write(self.doc.get_provn(4))
def get_blank_prov_document(): return ProvDocument(namespaces=all_namespaces) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description= "Process and generate provenance for a MIMIC patient admission") parser.add_argument("admission_id", type=int, help="The ID of admission to process") args = parser.parse_args() prov_doc1 = ProvDocument(namespaces=all_namespaces) admission = Admission(prov_doc1, args.admission_id) admission.process() filepath = output_path / f"{args.admission_id}.json" with filepath.open("w") as f: prov_doc1.serialize(f) provn_content = prov_doc1.get_provn() print(provn_content) with filepath.with_suffix(".provn").open("w") as f: f.write(provn_content) dot = prov_to_dot(prov_doc1) dot.write_pdf(filepath.with_suffix(".pdf")) db.close_session()
def example(): g = ProvDocument() # Local namespace # Doesnt exist yet so we are creating it ap = Namespace('aip', 'https://araport.org/provenance/') # Dublin Core g.add_namespace("dcterms", "http://purl.org/dc/terms/") # FOAF g.add_namespace("foaf", "http://xmlns.com/foaf/0.1/") # Add sponsors and contributors as Agents # ap['matthew_vaughn'] # aip:matthew_vaughn # https://araport.org/provenance/:matthew_vaughn # Learn this from a call to profiles service? Adds a dependency on Agave so I am open to figuring out another way me = g.agent(ap['matthew_vaughn'], { 'prov:type': PROV["Person"], 'foaf:givenName': "Matthew Vaughn", 'foaf:mbox': "<mailto:[email protected]>" }) # Hard coded for now walter = g.agent(ap['walter_moreira'], { 'prov:type': PROV["Person"], 'foaf:givenName': "Walter Moreira", 'foaf:mbox': "<mailto:[email protected]>" }) utexas = g.agent(ap['university_of_texas'], { 'prov:type': PROV["Organization"], 'foaf:givenName': "University of Texas at Austin" }) # Set delegation to our host University # We may have trouble doing this for other users since we don't always capture their host instituion g.actedOnBehalfOf(walter, utexas) g.actedOnBehalfOf(me, utexas) # Include the ADAMA platform as an Agent and set attribution # dcterms:title and dcterms:description are hardcoded # dcterms:language is hard-coded # dcterms:source is the URI of the public git source repository for ADAMA # "dcterms:updated": "2015-04-17T09:44:56" - this would actually be the date ADAMA was updated adama_platform = g.agent(ap['adama_platform'], {'dcterms:title': "ADAMA", 'dcterms:description': "Araport Data and Microservices API", 'dcterms:language':"en-US", 'dcterms:identifier':"https://api.araport.org/community/v0.3/", 'dcterms:updated': "2015-04-17T09:44:56" }) g.wasGeneratedBy(adama_platform, walter) # Include the ADAMA microservice as an Agent and set attribution+delegation # dcterms:title and dcterms:description are inherited from the service's metadata # dcterms:language is hard-coded # dcterms:identifier is the deployment URI for the service # dcterms:source is the URI of the public git source repository. The URL in this example is just a dummy # # The name for each microservice should be unique. We've decided to # use the combination of namespace, service name, and version microservice_name = 'mwvaughn/bar_annotation_v1.0.0' adama_microservice = g.agent(ap[microservice_name], {'dcterms:title': "BAR Annotation Service", 'dcterms:description': "Returns annotation from locus ID", 'dcterms:language':"en-US", 'dcterms:identifier':"https://api.araport.org/community/v0.3/mwvaughn/bar_annotation_v1.0.0", 'dcterms:source':"https://github.com/Arabidopsis-Information-Portal/prov-enabled-api-sample" }) # the microservice was generated by me on date X (don't use now, use when the service was updated) g.wasGeneratedBy(adama_microservice, me, datetime.datetime.now()) # The microservice used the platform now g.used(adama_microservice, adama_platform, datetime.datetime.now()) # Sources # # Define BAR # Agents nick = g.agent(ap['nicholas_provart'], { 'prov:type': PROV["Person"], 'foaf:givenName': "Nicholas Provart", 'foaf:mbox': "*****@*****.**" }) utoronto = g.agent(ap['university_of_toronto'], { 'prov:type': PROV["Organization"], 'foaf:givenName': "University of Toronto", 'dcterms:identifier':"http://www.utoronto.ca/" }) g.actedOnBehalfOf(nick, utoronto) # Entity # All fields derived from Sources.yml # dcterms:title and dcterms:description come straight from the YAML # dcterms:identifier - URI pointing to the source's canonical URI representation # optional - dcterms:language: Recommended best practice is to use a controlled vocabulary such as RFC 4646 # optional - dcterms:updated: date the source was published or last updated # optional - dcterms:license: Simple string or URI to license. Validate URI if provided? datasource1 = g.entity(ap['datasource1'], {'dcterms:title': "BAR Arabidopsis AGI -> Annotation", 'dcterms:description': "Most recent annotation for given AGI", 'dcterms:language':"en-US", 'dcterms:identifier':"http://bar.utoronto.ca/webservices/agiToAnnot.php", 'dcterms:updated':"2015-04-17T09:44:56", 'dcterms:license':"Creative Commons 3.0" }) # Set up attribution to Nick g.wasAttributedTo(datasource1, nick) # Define TAIR # Agents # dcterms:language: Recommended best practice is to use a controlled vocabulary such as RFC 4646 eva = g.agent(ap['eva_huala'], { 'prov:type': PROV["Person"], 'foaf:givenName': "Eva Huala" }) phoenix = g.agent(ap['phoenix_bioinformatics'], { 'prov:type': PROV["Organization"], 'foaf:givenName': "Phoenix Bioinformatics" }) g.actedOnBehalfOf(eva, phoenix) # Entity # All fields derived from Sources.yml # optional - dcterms:citation: Plain text bibliographic citation. If only provided as doi, should we try to validate it? datasource2 = g.entity(ap['datasource2'], {'dcterms:title': "TAIR", 'dcterms:description': "The Arabidopsis Information Resource", 'dcterms:language':"en-US", 'dcterms:identifier':"https://www.arabidopsis.org/", 'dcterms:citation':"The Arabidopsis Information Resource (TAIR): improved gene annotation and new tools. Nucleic Acids Research 2011 doi: 10.1093/nar/gkr1090"}) g.wasAttributedTo(datasource2, eva) # In Sources.yml, these two sources are nested. Define that relationship here # There are other types of relationships but we will just use derived from for simplicity in this prototype g.wasDerivedFrom(ap['datasource1'], ap['datasource2']) # Depending on which ADAMA microservice type we are using, define an activity # Eventually, break these into more atomic actions in a chain action1 = g.activity(ap['do_query'], datetime.datetime.now()) # action1 = g.activity(ap['do_map'], datetime.datetime.now()) # action1 = g.activity(ap['do_generic'], datetime.datetime.now()) # action1 = g.activity(ap['do_passthrough'], datetime.datetime.now()) # Future... Support for ADAMA-native microservices # action1 = g.activity(ap['generate'], datetime.datetime.now()) # Define current ADAMA response as an Entity # This is what's being returned to the user and is thus the subject of the PROV record # May be able to add more attributes to it but this is the minimum response = g.entity(ap['adama_response']) # Response is generated by the process_query action # Time-stamp it! g.wasGeneratedBy(response, ap['do_query'], datetime.datetime.now()) # The process_query used the microservice g.used(ap['do_query'], adama_microservice, datetime.datetime.now()) # The microservice used datasource1 g.used(adama_microservice, datasource1, datetime.datetime.now()) # Print prov_n print(g.get_provn()) # Print prov-json print(g.serialize()) # Write out as a pretty picture graph = prov.dot.prov_to_dot(g) graph.write_png('Sources.png')
def get_prov(): doc = ProvDocument() doc.set_default_namespace('http://roocs.org/') doc.get_provn()
logpage_ident = get_logpage(str(logpage), prov_doc) return plate_ident # Create a new provenance document d1 = ProvDocument() declare_namespaces(d1) # get V468Cyg # get_plate # process = get_process('2180', d1) try: # scan = get_entity('2462','scan', d1) id = '2180' prov_type = 'lightcurve' # plate_name = get_entity(id,prov_type, d1) # process_name = get_process('9804',d1) # logpage_name = get_logpage('10085',d1) # source_id = get_source('40000001', d1) # plate_ident = get_plate_prov(id, d1) id = get_lightcurve('614-089373', d1) except TypeError: print('the job is still executing...') print(d1.get_provn()) filename = 'prov_' + prov_type + id d1.serialize(filename + '.xml', format='xml') dot = prov_to_dot(d1) dot.write_png(filename + '.png')