Beispiel #1
0
def make_workflow_rocrate(workflow_path,
                          wf_type,
                          include_files=[],
                          fetch_remote=False,
                          cwl=None,
                          diagram=None):

    # Properties
    # missing?
    # input
    # output
    # programmingLanguage
    # url
    # version
    # sdPublisher - current set to the person that provided the metadata,
    #   decision to change to the Workflow Hub itself - Done
    # publisher - where it came came from, e.g. Galaxy,  github, or WF Hub
    #   if uploaded - Done
    # producer - to describe the Project or Team - Done
    # creator - the creators / authors - Done
    # maintainer - new recommended property to describe the uploader +
    #  additional people with manage rights - Done
    # funder - example of cordis reference
    #   https://cordis.europa.eu/project/id/730976
    #   https://schema.org/FundingScheme linked to funder
    #   Examples at the bottom of https://schema.org/Grant - funding looks
    #   ideal but not currently legal
    #   Is needed to fulfill the OpenAire “Funding Reference” property
    # datePublished - becomes an optional property, and we use the date a
    #   DOI was minted (this property is needed for dataCite) - Done
    # creativeWorkStatus - Maturity level, to be added to BioSchemas - Done
    # Identifier - can be DOI if this function is enabled in WorkflowHub - Done

    # returns a complete ROCrate object corresponding to a Workflow template
    #   file
    # wf_type: Galaxy, CWL, Nextflow, ...
    # cwl: CWL/CWL-Abstract representation of the workflow.
    # diagram: an image/graphical workflow representation.
    # If a CWL/CWLAbstract file is provided, this is generated using cwltool

    wf_crate = roc.ROCrate()
    workflow_path = Path(workflow_path)
    wf_file = wf_crate.add_workflow(str(workflow_path),
                                    workflow_path.name,
                                    fetch_remote=fetch_remote,
                                    main=True,
                                    lang=wf_type,
                                    gen_cwl=(cwl is None))

    # if the source is a remote URL then add https://schema.org/codeRepository
    # property to it this can be checked by checking if the source is a URL
    # instead of a local path
    if 'url' in wf_file.properties():
        wf_file['codeRepository'] = wf_file['url']

    # add extra files
    for file_entry in include_files:
        wf_crate.add_file(file_entry)

    return wf_crate
Beispiel #2
0
def make_workflow_rocrate(workflow_path,
                          wf_type,
                          include_files=[],
                          fetch_remote=False,
                          cwl=None,
                          diagram=None):

    # Properties
    # missing?
    # input
    # output
    # programmingLanguage
    # url
    # version
    # sdPublisher - current set to the person that provided the metadata,
    #   decision to change to the Workflow Hub itself - Done
    # publisher - where it came came from, e.g. Galaxy,  github, or WF Hub
    #   if uploaded - Done
    # producer - to describe the Project or Team - Done
    # creator - the creators / authors - Done
    # maintainer - new recommended property to describe the uploader +
    #  additional people with manage rights - Done
    # funder - example of cordis reference
    #   https://cordis.europa.eu/project/id/730976
    #   https://schema.org/FundingScheme linked to funder
    #   Examples at the bottom of https://schema.org/Grant - funding looks
    #   ideal but not currently legal
    #   Is needed to fulfill the OpenAire “Funding Reference” property
    # datePublished - becomes an optional property, and we use the date a
    #   DOI was minted (this property is needed for dataCite) - Done
    # creativeWorkStatus - Maturity level, to be added to BioSchemas - Done
    # Identifier - can be DOI if this function is enabled in WorkflowHub - Done

    # returns a complete ROCrate object corresponding to a Workflow template
    #   file
    # wf_type: Galaxy, CWL, Nextflow, ...
    # cwl: CWL/CWL-Abstract representation of the workflow.
    # diagram: an image/graphical workflow representation.
    # If a CWL/CWLAbstract file is provided, this is generated using cwltool

    wf_crate = roc.ROCrate()
    wf_path = Path(workflow_path)
    # should this be added in a special path within the crate?
    wf_file = Workflow(wf_crate, str(wf_path), wf_path.name)
    wf_crate._add_data_entity(wf_file)
    wf_crate.set_main_entity(wf_file)
    if wf_type == 'CWL':
        programming_language_entity = entity.Entity(
            wf_crate,
            'https://www.commonwl.org/v1.1/',
            properties={
                "@type": ["ComputerLanguage", "SoftwareApplication"],
                'name': 'CWL',
                'url': 'https://www.commonwl.org/v1.1/',
                'version': '1.1'
            })
    if wf_type == 'Galaxy':
        if not cwl:
            # create cwl_abstract
            with tempfile.NamedTemporaryFile(mode='w',
                                             delete=False,
                                             suffix=".cwl") as f:
                with redirect_stdout(f):
                    get_cwl_interface.main(['1', workflow_path])
            atexit.register(os.unlink, f.name)
            abstract_wf_id = wf_path.with_suffix(".cwl").name
            abstract_wf_file = Workflow(wf_crate, f.name, abstract_wf_id)
            wf_crate._add_data_entity(abstract_wf_file)
            wf_file["subjectOf"] = abstract_wf_file
        programming_language_entity = entity.Entity(
            wf_crate, 'https://galaxyproject.org/')
    if programming_language_entity:
        wf_file['programmingLanguage'] = programming_language_entity

    # if the source is a remote URL then add https://schema.org/codeRepository
    # property to it this can be checked by checking if the source is a URL
    # instead of a local path
    if 'url' in wf_file.properties():
        wf_file['codeRepository'] = wf_file['url']

    # add extra files
    for file_entry in include_files:
        wf_crate.add_file(file_entry)

    return wf_crate
Beispiel #3
0
    def createResearchObject(self, wf_url, input_files, execution_path,
                             wf_yaml):
        """"
        Create RO-crate from execution provenance.

        :param wf_url: Remote workflow location.
        :type wf_url: str
        :param input_files: Dictionary of input files locations.
        :type input_files: dict
        :param execution_path: Working directory.
        :type execution_path: str
        :param wf_yaml: YAML filename.
        :type wf_yaml: str
        """
        try:

            # Create RO-Crate
            wf_crate = rocrate.ROCrate()
            wf_file = wf_crate.add_workflow(wf_url,
                                            fetch_remote=True,
                                            main=True)

            # Add url, codeRepository and isBasedOn to RO-crate
            parsed_wf_url = parse.urlparse(wf_url)
            wf_path = parsed_wf_url.path.split("/")

            if parsed_wf_url.netloc == "raw.githubusercontent.com":
                repoURL = None
                repoTag = None
                repoRelPath = None

                if len(wf_path) >= 3:
                    repoGitPath = wf_path[:3]
                    repoURL = parse.urlunparse(
                        ("https", "github.com", "/".join(repoGitPath), "", "",
                         ""))

                    if len(wf_path) >= 4:
                        repoTag = wf_path[3]

                        if len(wf_path) >= 5:
                            repoRelPath = "/".join(wf_path[4:])

                repoGit = repoURL + "/tree/" + repoTag + "/" + repoRelPath

                wf_file.properties()['url'] = repoGit.replace("tree", "blob")
                wf_file.properties()['codeRepository'] = os.path.dirname(
                    repoGit)
                wf_crate.isBasedOn = os.path.dirname(repoGit)

            else:
                logger.error(
                    "FIXME: Unsupported http(s) GitHub repository {}".format(
                        parsed_wf_url))

            # Add inputs provenance data to RO-crate
            for in_id, in_value in input_files.items():
                if isinstance(in_value, list):
                    for elem in in_value:
                        self.addInputToResearchObject(wf_crate, in_id, elem)
                else:
                    self.addInputToResearchObject(wf_crate, in_id, in_value)

            # Add outputs provenance data to RO-crate
            # TODO

            wf_crate_path = os.path.join(execution_path, self.provenance_path)
            wf_crate.writeCrate(wf_crate_path)

            # Add YAML file to RO-Crate
            shutil.move(wf_yaml, self.provenance_path)

            # Compress RO-crate to zip
            shutil.make_archive(self.provenance_path, "zip", wf_crate_path)
            shutil.rmtree(wf_crate_path)

        except:
            errstr = "Cannot create RO-Crate. See logs."
            logger.error(errstr)
            raise Exception(errstr)
Beispiel #4
0
def make_workflow_rocrate(workflow_path,
                          wf_type,
                          include_files=[],
                          fetch_remote=False,
                          cwl=None,
                          diagram=None):

    # Properties
    # missing?
    # input
    # output
    # programmingLanguage
    # url
    #version
    # sdPublisher - current set to the person that provided the metadata, decision to change to the Workflow Hub itself - Done

    # publisher - where it came came from, e.g. Galaxy,  github, or WF Hub if uploaded - Done

    # producer - to describe the Project or Team Done

    # creator - the creators/ authors Done

    # maintainer - new recommended property to describe the uploader + additional people with manage rights Done

    # funder - example of cordis reference - https://cordis.europa.eu/project/id/730976
    # https://schema.org/FundingScheme linked to funder
    # Examples at the bottom of https://schema.org/Grant - funding looks ideal but not currently legal
    # Is needed to fulfill the OpenAire “Funding Reference” property

    # datePublished - becomes an optional property, and we use the date a DOI was minted (this property is needed for dataCite) Done

    # creativeWorkStatus - Maturity level, to be added to BioSchemas Done

    # Identifier - can be DOI if this function is enabled in WorkflowHub Done

    #returns a complete ROCrate object corresponding to a Workflow template file
    # wf_type: Galaxy, CWL , Nextflow..
    # cwl: CWL/CWL-Abstract representation of the workflow. If the
    # diagram: an image/graphical workflow representation.
    #         If a CWL/CWLAbstract file is provided then this is generated using cwltool
    #abs_path = os.path.abspath(workflow_path)
    wf_crate = roc.ROCrate()
    # add main workflow file
    file_name = os.path.basename(workflow_path)
    wf_file = wf_crate.add_file(
        workflow_path,
        file_name)  # should I add it in a special path within the crate?
    wf_crate.set_main_entity(wf_file)
    if wf_type == 'CWL':
        programming_language_entity = entity.Entity(
            wf_crate,
            'https://www.commonwl.org/v1.1/',
            properties={
                "@type": ["ComputerLanguage", "SoftwareApplication"],
                'name': 'CWL',
                'url': 'https://www.commonwl.org/v1.1/',
                'version': '1.1'
            })
    if wf_type == 'Galaxy':
        if not cwl:
            #create cwl_abstract
            with tempfile.NamedTemporaryFile(mode='w',
                                             delete=False) as cwl_abstract_out:
                with redirect_stdout(cwl_abstract_out):
                    get_cwl_interface.main(['1', workflow_path])
            wf_file = wf_crate.add_file(
                cwl_abstract_out.name,
                'abstract_wf.cwl',
                properties={
                    "@type": ["ComputerLanguage", "SoftwareApplication"]
                })
        programming_language_entity = entity.Entity(
            wf_crate, 'https://galaxyproject.org/')

    ### SET PROPERTIES
    # A contextual entity representing a SoftwareApplication or ComputerLanguage MUST have a name, url and version,
    # which should indicate a known version the workflow/script was developed or tested with
    if programming_language_entity:
        wf_file['programmingLanguage'] = programming_language_entity

    # based on ro-crate specification. for workflows: @type is an array with at least File and Workflow as values.
    wf_type = wf_file['@type']
    if not isinstance(wf_type, list):
        wf_type = [wf_type]
    if 'Workflow' not in wf_type:
        wf_type.append('Workflow')
    if 'SoftwareSourceCode' not in wf_type:
        wf_type.append('SoftwareSourceCode')
    wf_file['@type'] = wf_type

    # if the source is a remote URL then add https://schema.org/codeRepository property to it
    # this can be checked by checking if the source is a URL instead of a local path
    if 'url' in wf_file.properties().keys():
        wf_file['codeRepository'] = wf_file['url']

    # add extra files
    for file_entry in include_files:
        wf_crate.add_file(file_entry)

    return wf_crate
Beispiel #5
0
with open(argsFile) as f:
    data = json.load(f)

wf_path = data['location']
files_list = []

# Workflow.TYPES=["File", "SoftwareSourceCode"]

#The commented command below created an additional html preview file in previous versions. In the current version
#the preview file is not created, therefore we call the commands below to include it.

# wf_crate = rocrate_api.make_workflow_rocrate(workflow_path=wf_path,wf_type="CWL",include_files=files_list)

cwl = None
wf_crate = roc.ROCrate(gen_preview=True)
workflow_path = Path(wf_path)
wf_file = wf_crate.add_workflow(str(workflow_path),
                                workflow_path.name,
                                fetch_remote=False,
                                main=True,
                                lang="CWL",
                                gen_cwl=(cwl is None))

# if the source is a remote URL then add https://schema.org/codeRepository
# property to it this can be checked by checking if the source is a URL
# instead of a local path
if 'url' in wf_file.properties():
    wf_file['codeRepository'] = wf_file['url']

# add extra files