Esempio n. 1
0
    def collect_outputs(self, output_directory):
        assert self._outputs_dict is None

        outputs_dict = {}
        if not output_directory:
            # TODO: rather than creating a directory just use
            # Galaxy paths if they are available in this
            # configuration.
            output_directory = tempfile.mkdtemp()

        for output in get_outputs(self._runnable):
            output_id = output.get_id()
            dataset = self.get_dataset_metadata(output)
            destination = self.download_output_to(output, output_directory)

            if dataset["file_ext"] == "expression.json":
                with open(destination, "r") as f:
                    dict_value = json.load(f)
            else:
                dict_value = {
                    "path": destination,
                    "class": "File",
                }
            outputs_dict[output_id] = dict_value
        self._outputs_dict = outputs_dict
Esempio n. 2
0
    def collect_outputs(self, output_directory):
        assert self._outputs_dict is None

        outputs_dict = {}
        if not output_directory:
            # TODO: rather than creating a directory just use
            # Galaxy paths if they are available in this
            # configuration.
            output_directory = tempfile.mkdtemp()

        for output in get_outputs(self._runnable):
            output_id = output.get_id()
            dataset = self.get_dataset_metadata(output)
            destination = self.download_output_to(output, output_directory)

            if dataset["file_ext"] == "expression.json":
                with open(destination, "r") as f:
                    dict_value = json.load(f)
            else:
                dict_value = {
                    "path": destination,
                    "class": "File",
                }
            outputs_dict[output_id] = dict_value
        self._outputs_dict = outputs_dict
Esempio n. 3
0
def test_outputs():
    outputs = get_outputs(for_path(A_CWL_TOOL))
    assert len(outputs) == 1
    output_id = outputs[0].get_id()
    assert output_id == "output_file"
Esempio n. 4
0
    def collect_outputs(self, ctx, output_directory):
        assert self._outputs_dict is None, "collect_outputs pre-condition violated"

        outputs_dict = {}
        if not output_directory:
            # TODO: rather than creating a directory just use
            # Galaxy paths if they are available in this
            # configuration.
            output_directory = tempfile.mkdtemp()

        def get_dataset(dataset_details, filename=None):
            parent_basename = dataset_details.get("cwl_file_name")
            if not parent_basename:
                parent_basename = dataset_details.get("name")
            file_ext = dataset_details["file_ext"]
            if file_ext == "directory":
                # TODO: rename output_directory to outputs_directory because we can have output directories
                # and this is confusing...
                the_output_directory = os.path.join(output_directory, parent_basename)
                safe_makedirs(the_output_directory)
                destination = self.download_output_to(dataset_details, the_output_directory, filename=filename)
            else:
                destination = self.download_output_to(dataset_details, output_directory, filename=filename)
            if filename is None:
                basename = parent_basename
            else:
                basename = os.path.basename(filename)

            return {"path": destination, "basename": basename}

        ctx.vlog("collecting outputs to directory %s" % output_directory)
        for runnable_output in get_outputs(self._runnable):
            output_id = runnable_output.get_id()
            output_dict_value = None
            if self._runnable.type in [RunnableType.cwl_workflow, RunnableType.cwl_tool]:
                galaxy_output = self.to_galaxy_output(runnable_output)
                cwl_output = output_to_cwl_json(
                    galaxy_output,
                    self._get_metadata,
                    get_dataset,
                    self._get_extra_files,
                    pseduo_location=True,
                )
                output_dict_value = cwl_output
            else:
                # TODO: deprecate this route for finding workflow outputs,
                # it is a brittle and bad approach...
                output_dataset_id = self.output_dataset_id(runnable_output)
                dataset = self._get_metadata("dataset", output_dataset_id)
                dataset_dict = get_dataset(dataset)
                ctx.vlog("populated destination [%s]" % dataset_dict["path"])

                if dataset["file_ext"] == "expression.json":
                    with open(dataset_dict["path"], "r") as f:
                        output_dict_value = json.load(f)
                else:
                    output_dict_value = output_properties(**dataset_dict)

            outputs_dict[output_id] = output_dict_value

        self._outputs_dict = outputs_dict
        ctx.vlog("collected outputs [%s]" % self._outputs_dict)
Esempio n. 5
0
    def collect_outputs(self, ctx, output_directory):
        assert self._outputs_dict is None, "collect_outputs pre-condition violated"

        outputs_dict = {}
        if not output_directory:
            # TODO: rather than creating a directory just use
            # Galaxy paths if they are available in this
            # configuration.
            output_directory = tempfile.mkdtemp()

        def get_dataset(dataset_details, filename=None):
            parent_basename = dataset_details.get("cwl_file_name")
            if not parent_basename:
                parent_basename = dataset_details.get("name")
            file_ext = dataset_details["file_ext"]
            if file_ext == "directory":
                # TODO: rename output_directory to outputs_directory because we can have output directories
                # and this is confusing...
                the_output_directory = os.path.join(output_directory, parent_basename)
                safe_makedirs(the_output_directory)
                destination = self.download_output_to(dataset_details, the_output_directory, filename=filename)
            else:
                destination = self.download_output_to(dataset_details, output_directory, filename=filename)
            if filename is None:
                basename = parent_basename
            else:
                basename = os.path.basename(filename)

            return {"path": destination, "basename": basename}

        ctx.vlog("collecting outputs to directory %s" % output_directory)
        for runnable_output in get_outputs(self._runnable):
            output_id = runnable_output.get_id()
            if not output_id:
                ctx.vlog("Workflow output identified without an ID (label), skipping")
                continue
            output_dict_value = None
            if self._runnable.type in [RunnableType.cwl_workflow, RunnableType.cwl_tool]:
                galaxy_output = self.to_galaxy_output(runnable_output)
                cwl_output = output_to_cwl_json(
                    galaxy_output,
                    self._get_metadata,
                    get_dataset,
                    self._get_extra_files,
                    pseduo_location=True,
                )
                output_dict_value = cwl_output
            else:
                # TODO: deprecate this route for finding workflow outputs,
                # it is a brittle and bad approach...
                output_dataset_id = self.output_dataset_id(runnable_output)
                dataset = self._get_metadata("dataset", output_dataset_id)
                dataset_dict = get_dataset(dataset)
                ctx.vlog("populated destination [%s]" % dataset_dict["path"])

                if dataset["file_ext"] == "expression.json":
                    with open(dataset_dict["path"], "r") as f:
                        output_dict_value = json.load(f)
                else:
                    output_dict_value = output_properties(**dataset_dict)

            outputs_dict[output_id] = output_dict_value

        self._outputs_dict = outputs_dict
        ctx.vlog("collected outputs [%s]" % self._outputs_dict)
Esempio n. 6
0
    def collect_outputs(self, ctx, output_directory):
        assert self._outputs_dict is None, "collect_outputs pre-condition violated"

        outputs_dict = {}
        if not output_directory:
            # TODO: rather than creating a directory just use
            # Galaxy paths if they are available in this
            # configuration.
            output_directory = tempfile.mkdtemp()

        def get_dataset(dataset_details, filename=None):
            parent_basename = dataset_details.get("cwl_file_name")
            if not parent_basename:
                parent_basename = dataset_details.get("name")
            file_ext = dataset_details["file_ext"]
            if file_ext == "directory":
                # TODO: rename output_directory to outputs_directory because we can have output directories
                # and this is confusing...
                the_output_directory = os.path.join(output_directory, parent_basename)
                safe_makedirs(the_output_directory)
                destination = self.download_output_to(dataset_details, the_output_directory, filename=filename)
            else:
                destination = self.download_output_to(dataset_details, output_directory, filename=filename)
            if filename is None:
                basename = parent_basename
            else:
                basename = os.path.basename(filename)

            return {"path": destination, "basename": basename}

        ctx.vlog("collecting outputs to directory %s" % output_directory)

        for runnable_output in get_outputs(self._runnable, gi=self._user_gi):
            output_id = runnable_output.get_id()
            if not output_id:
                ctx.vlog("Workflow output identified without an ID (label), skipping")
                continue
            output_dict_value = None
            is_cwl = self._runnable.type in [RunnableType.cwl_workflow, RunnableType.cwl_tool]
            output_src = self.output_src(runnable_output)
            if not is_cwl and output_src["src"] == "hda":
                output_dataset_id = output_src["id"]
                dataset = self._get_metadata("dataset", output_dataset_id)
                dataset_dict = get_dataset(dataset)
                ctx.vlog("populated destination [%s]" % dataset_dict["path"])

                if dataset["file_ext"] == "expression.json":
                    with open(dataset_dict["path"], "r") as f:
                        output_dict_value = json.load(f)
                else:
                    output_dict_value = output_properties(**dataset_dict)
            else:
                output_dataset_id = output_src["id"]
                galaxy_output = self.to_galaxy_output(runnable_output)
                cwl_output = output_to_cwl_json(
                    galaxy_output,
                    self._get_metadata,
                    get_dataset,
                    self._get_extra_files,
                    pseduo_location=True,
                )
                if is_cwl:
                    output_dict_value = cwl_output
                else:

                    def attach_file_properties(collection, cwl_output):
                        elements = collection["elements"]
                        assert len(elements) == len(cwl_output)
                        for element, cwl_output_element in zip(elements, cwl_output):
                            element["_output_object"] = cwl_output_element
                            if isinstance(cwl_output_element, list):
                                assert "elements" in element["object"]
                                attach_file_properties(element["object"], cwl_output_element)

                    output_metadata = self._get_metadata("dataset_collection", output_dataset_id)
                    attach_file_properties(output_metadata, cwl_output)
                    output_dict_value = output_metadata

            outputs_dict[output_id] = output_dict_value

        self._outputs_dict = outputs_dict
        ctx.vlog("collected outputs [%s]" % self._outputs_dict)
Esempio n. 7
0
def test_outputs():
    outputs = get_outputs(for_path(A_CWL_WORKFLOW))
    assert len(outputs) == 1
    output_id = outputs[0].get_id()
    assert output_id == "count_output"
Esempio n. 8
0
def test_outputs():
    outputs = get_outputs(for_path(A_CWL_WORKFLOW))
    assert len(outputs) == 1
    output_id = outputs[0].get_id()
    assert output_id == "count_output"
Esempio n. 9
0
    def collect_outputs(self, ctx, output_directory):

        outputs_dict = {}
        # TODO: rather than creating a directory just use
        # Galaxy paths if they are available in this
        # configuration.
        output_directory = output_directory or tempfile.mkdtemp()

        def get_dataset(dataset_details, filename=None):
            parent_basename = dataset_details.get("cwl_file_name") or dataset_details.get("name")
            file_ext = dataset_details["file_ext"]
            if file_ext == "directory":
                # TODO: rename output_directory to outputs_directory because we can have output directories
                # and this is confusing...
                the_output_directory = os.path.join(output_directory, parent_basename)
                safe_makedirs(the_output_directory)
                destination = self.download_output_to(ctx, dataset_details, the_output_directory, filename=filename)
            else:
                destination = self.download_output_to(ctx, dataset_details, output_directory, filename=filename)
            if filename is None:
                basename = parent_basename
            else:
                basename = os.path.basename(filename)

            return {"path": destination, "basename": basename}

        ctx.vlog("collecting outputs to directory %s" % output_directory)

        for runnable_output in get_outputs(self._runnable, gi=self._user_gi):
            output_id = runnable_output.get_id()
            if not output_id:
                ctx.vlog("Workflow output identified without an ID (label), skipping")
                continue
            output_dict_value = None
            is_cwl = self._runnable.type in [RunnableType.cwl_workflow, RunnableType.cwl_tool]
            output_src = self.output_src(runnable_output)
            output_dataset_id = output_src["id"]
            galaxy_output = self.to_galaxy_output(runnable_output)
            try:
                cwl_output = output_to_cwl_json(
                    galaxy_output,
                    self._get_metadata,
                    get_dataset,
                    self._get_extra_files,
                    pseduo_location=True,
                )
            except AssertionError:
                # In galaxy-tool-util < 21.05 output_to_cwl_json will raise an AssertionError when the output state is not OK
                # Remove with new galaxy-tool-util release.
                continue
            if is_cwl or output_src["src"] == "hda":
                output_dict_value = cwl_output
            else:

                def attach_file_properties(collection, cwl_output):
                    elements = collection["elements"]
                    assert len(elements) == len(cwl_output)
                    for element, cwl_output_element in zip(elements, cwl_output):
                        element["_output_object"] = cwl_output_element
                        if isinstance(cwl_output_element, list):
                            assert "elements" in element["object"]
                            attach_file_properties(element["object"], cwl_output_element)

                output_metadata = self._get_metadata("dataset_collection", output_dataset_id)
                attach_file_properties(output_metadata, cwl_output)
                output_dict_value = output_metadata

            outputs_dict[output_id] = output_dict_value

        self._outputs_dict = outputs_dict
        ctx.vlog("collected outputs [%s]" % self._outputs_dict)