def make_workflow_rocrate(workflow_path, wf_type, include_files=[], fetch_remote=False, cwl=None, diagram=None): # Properties # missing? # input # output # programmingLanguage # url # version # sdPublisher - current set to the person that provided the metadata, # decision to change to the Workflow Hub itself - Done # publisher - where it came came from, e.g. Galaxy, github, or WF Hub # if uploaded - Done # producer - to describe the Project or Team - Done # creator - the creators / authors - Done # maintainer - new recommended property to describe the uploader + # additional people with manage rights - Done # funder - example of cordis reference # https://cordis.europa.eu/project/id/730976 # https://schema.org/FundingScheme linked to funder # Examples at the bottom of https://schema.org/Grant - funding looks # ideal but not currently legal # Is needed to fulfill the OpenAire “Funding Reference” property # datePublished - becomes an optional property, and we use the date a # DOI was minted (this property is needed for dataCite) - Done # creativeWorkStatus - Maturity level, to be added to BioSchemas - Done # Identifier - can be DOI if this function is enabled in WorkflowHub - Done # returns a complete ROCrate object corresponding to a Workflow template # file # wf_type: Galaxy, CWL, Nextflow, ... # cwl: CWL/CWL-Abstract representation of the workflow. # diagram: an image/graphical workflow representation. # If a CWL/CWLAbstract file is provided, this is generated using cwltool wf_crate = roc.ROCrate() workflow_path = Path(workflow_path) wf_file = wf_crate.add_workflow(str(workflow_path), workflow_path.name, fetch_remote=fetch_remote, main=True, lang=wf_type, gen_cwl=(cwl is None)) # if the source is a remote URL then add https://schema.org/codeRepository # property to it this can be checked by checking if the source is a URL # instead of a local path if 'url' in wf_file.properties(): wf_file['codeRepository'] = wf_file['url'] # add extra files for file_entry in include_files: wf_crate.add_file(file_entry) return wf_crate
def make_workflow_rocrate(workflow_path, wf_type, include_files=[], fetch_remote=False, cwl=None, diagram=None): # Properties # missing? # input # output # programmingLanguage # url # version # sdPublisher - current set to the person that provided the metadata, # decision to change to the Workflow Hub itself - Done # publisher - where it came came from, e.g. Galaxy, github, or WF Hub # if uploaded - Done # producer - to describe the Project or Team - Done # creator - the creators / authors - Done # maintainer - new recommended property to describe the uploader + # additional people with manage rights - Done # funder - example of cordis reference # https://cordis.europa.eu/project/id/730976 # https://schema.org/FundingScheme linked to funder # Examples at the bottom of https://schema.org/Grant - funding looks # ideal but not currently legal # Is needed to fulfill the OpenAire “Funding Reference” property # datePublished - becomes an optional property, and we use the date a # DOI was minted (this property is needed for dataCite) - Done # creativeWorkStatus - Maturity level, to be added to BioSchemas - Done # Identifier - can be DOI if this function is enabled in WorkflowHub - Done # returns a complete ROCrate object corresponding to a Workflow template # file # wf_type: Galaxy, CWL, Nextflow, ... # cwl: CWL/CWL-Abstract representation of the workflow. # diagram: an image/graphical workflow representation. # If a CWL/CWLAbstract file is provided, this is generated using cwltool wf_crate = roc.ROCrate() wf_path = Path(workflow_path) # should this be added in a special path within the crate? wf_file = Workflow(wf_crate, str(wf_path), wf_path.name) wf_crate._add_data_entity(wf_file) wf_crate.set_main_entity(wf_file) if wf_type == 'CWL': programming_language_entity = entity.Entity( wf_crate, 'https://www.commonwl.org/v1.1/', properties={ "@type": ["ComputerLanguage", "SoftwareApplication"], 'name': 'CWL', 'url': 'https://www.commonwl.org/v1.1/', 'version': '1.1' }) if wf_type == 'Galaxy': if not cwl: # create cwl_abstract with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=".cwl") as f: with redirect_stdout(f): get_cwl_interface.main(['1', workflow_path]) atexit.register(os.unlink, f.name) abstract_wf_id = wf_path.with_suffix(".cwl").name abstract_wf_file = Workflow(wf_crate, f.name, abstract_wf_id) wf_crate._add_data_entity(abstract_wf_file) wf_file["subjectOf"] = abstract_wf_file programming_language_entity = entity.Entity( wf_crate, 'https://galaxyproject.org/') if programming_language_entity: wf_file['programmingLanguage'] = programming_language_entity # if the source is a remote URL then add https://schema.org/codeRepository # property to it this can be checked by checking if the source is a URL # instead of a local path if 'url' in wf_file.properties(): wf_file['codeRepository'] = wf_file['url'] # add extra files for file_entry in include_files: wf_crate.add_file(file_entry) return wf_crate
def createResearchObject(self, wf_url, input_files, execution_path, wf_yaml): """" Create RO-crate from execution provenance. :param wf_url: Remote workflow location. :type wf_url: str :param input_files: Dictionary of input files locations. :type input_files: dict :param execution_path: Working directory. :type execution_path: str :param wf_yaml: YAML filename. :type wf_yaml: str """ try: # Create RO-Crate wf_crate = rocrate.ROCrate() wf_file = wf_crate.add_workflow(wf_url, fetch_remote=True, main=True) # Add url, codeRepository and isBasedOn to RO-crate parsed_wf_url = parse.urlparse(wf_url) wf_path = parsed_wf_url.path.split("/") if parsed_wf_url.netloc == "raw.githubusercontent.com": repoURL = None repoTag = None repoRelPath = None if len(wf_path) >= 3: repoGitPath = wf_path[:3] repoURL = parse.urlunparse( ("https", "github.com", "/".join(repoGitPath), "", "", "")) if len(wf_path) >= 4: repoTag = wf_path[3] if len(wf_path) >= 5: repoRelPath = "/".join(wf_path[4:]) repoGit = repoURL + "/tree/" + repoTag + "/" + repoRelPath wf_file.properties()['url'] = repoGit.replace("tree", "blob") wf_file.properties()['codeRepository'] = os.path.dirname( repoGit) wf_crate.isBasedOn = os.path.dirname(repoGit) else: logger.error( "FIXME: Unsupported http(s) GitHub repository {}".format( parsed_wf_url)) # Add inputs provenance data to RO-crate for in_id, in_value in input_files.items(): if isinstance(in_value, list): for elem in in_value: self.addInputToResearchObject(wf_crate, in_id, elem) else: self.addInputToResearchObject(wf_crate, in_id, in_value) # Add outputs provenance data to RO-crate # TODO wf_crate_path = os.path.join(execution_path, self.provenance_path) wf_crate.writeCrate(wf_crate_path) # Add YAML file to RO-Crate shutil.move(wf_yaml, self.provenance_path) # Compress RO-crate to zip shutil.make_archive(self.provenance_path, "zip", wf_crate_path) shutil.rmtree(wf_crate_path) except: errstr = "Cannot create RO-Crate. See logs." logger.error(errstr) raise Exception(errstr)
def make_workflow_rocrate(workflow_path, wf_type, include_files=[], fetch_remote=False, cwl=None, diagram=None): # Properties # missing? # input # output # programmingLanguage # url #version # sdPublisher - current set to the person that provided the metadata, decision to change to the Workflow Hub itself - Done # publisher - where it came came from, e.g. Galaxy, github, or WF Hub if uploaded - Done # producer - to describe the Project or Team Done # creator - the creators/ authors Done # maintainer - new recommended property to describe the uploader + additional people with manage rights Done # funder - example of cordis reference - https://cordis.europa.eu/project/id/730976 # https://schema.org/FundingScheme linked to funder # Examples at the bottom of https://schema.org/Grant - funding looks ideal but not currently legal # Is needed to fulfill the OpenAire “Funding Reference” property # datePublished - becomes an optional property, and we use the date a DOI was minted (this property is needed for dataCite) Done # creativeWorkStatus - Maturity level, to be added to BioSchemas Done # Identifier - can be DOI if this function is enabled in WorkflowHub Done #returns a complete ROCrate object corresponding to a Workflow template file # wf_type: Galaxy, CWL , Nextflow.. # cwl: CWL/CWL-Abstract representation of the workflow. If the # diagram: an image/graphical workflow representation. # If a CWL/CWLAbstract file is provided then this is generated using cwltool #abs_path = os.path.abspath(workflow_path) wf_crate = roc.ROCrate() # add main workflow file file_name = os.path.basename(workflow_path) wf_file = wf_crate.add_file( workflow_path, file_name) # should I add it in a special path within the crate? wf_crate.set_main_entity(wf_file) if wf_type == 'CWL': programming_language_entity = entity.Entity( wf_crate, 'https://www.commonwl.org/v1.1/', properties={ "@type": ["ComputerLanguage", "SoftwareApplication"], 'name': 'CWL', 'url': 'https://www.commonwl.org/v1.1/', 'version': '1.1' }) if wf_type == 'Galaxy': if not cwl: #create cwl_abstract with tempfile.NamedTemporaryFile(mode='w', delete=False) as cwl_abstract_out: with redirect_stdout(cwl_abstract_out): get_cwl_interface.main(['1', workflow_path]) wf_file = wf_crate.add_file( cwl_abstract_out.name, 'abstract_wf.cwl', properties={ "@type": ["ComputerLanguage", "SoftwareApplication"] }) programming_language_entity = entity.Entity( wf_crate, 'https://galaxyproject.org/') ### SET PROPERTIES # A contextual entity representing a SoftwareApplication or ComputerLanguage MUST have a name, url and version, # which should indicate a known version the workflow/script was developed or tested with if programming_language_entity: wf_file['programmingLanguage'] = programming_language_entity # based on ro-crate specification. for workflows: @type is an array with at least File and Workflow as values. wf_type = wf_file['@type'] if not isinstance(wf_type, list): wf_type = [wf_type] if 'Workflow' not in wf_type: wf_type.append('Workflow') if 'SoftwareSourceCode' not in wf_type: wf_type.append('SoftwareSourceCode') wf_file['@type'] = wf_type # if the source is a remote URL then add https://schema.org/codeRepository property to it # this can be checked by checking if the source is a URL instead of a local path if 'url' in wf_file.properties().keys(): wf_file['codeRepository'] = wf_file['url'] # add extra files for file_entry in include_files: wf_crate.add_file(file_entry) return wf_crate
with open(argsFile) as f: data = json.load(f) wf_path = data['location'] files_list = [] # Workflow.TYPES=["File", "SoftwareSourceCode"] #The commented command below created an additional html preview file in previous versions. In the current version #the preview file is not created, therefore we call the commands below to include it. # wf_crate = rocrate_api.make_workflow_rocrate(workflow_path=wf_path,wf_type="CWL",include_files=files_list) cwl = None wf_crate = roc.ROCrate(gen_preview=True) workflow_path = Path(wf_path) wf_file = wf_crate.add_workflow(str(workflow_path), workflow_path.name, fetch_remote=False, main=True, lang="CWL", gen_cwl=(cwl is None)) # if the source is a remote URL then add https://schema.org/codeRepository # property to it this can be checked by checking if the source is a URL # instead of a local path if 'url' in wf_file.properties(): wf_file['codeRepository'] = wf_file['url'] # add extra files