def test_output_properties_in_memory(): props = output_properties(content=b"hello world", basename="hello.txt") assert props["basename"] == "hello.txt" assert props["nameroot"] == "hello" assert props["nameext"] == ".txt" assert props["size"] == 11 assert props["checksum"] == "sha1$2aae6c35c94fcfb415dbe95f408b9ce91ee846ed"
def test_output_properties_path(): f = tempfile.NamedTemporaryFile(mode="w") f.write("hello world") f.flush() props = output_properties(path=f.name, basename="hello.txt") assert props["basename"] == "hello.txt" assert props["nameroot"] == "hello" assert props["nameext"] == ".txt" assert props["size"] == 11 assert props["checksum"] == "sha1$2aae6c35c94fcfb415dbe95f408b9ce91ee846ed"
def collect_outputs(self, ctx, output_directory): assert self._outputs_dict is None, "collect_outputs pre-condition violated" outputs_dict = {} if not output_directory: # TODO: rather than creating a directory just use # Galaxy paths if they are available in this # configuration. output_directory = tempfile.mkdtemp() def get_dataset(dataset_details, filename=None): parent_basename = dataset_details.get("cwl_file_name") if not parent_basename: parent_basename = dataset_details.get("name") file_ext = dataset_details["file_ext"] if file_ext == "directory": # TODO: rename output_directory to outputs_directory because we can have output directories # and this is confusing... the_output_directory = os.path.join(output_directory, parent_basename) safe_makedirs(the_output_directory) destination = self.download_output_to(dataset_details, the_output_directory, filename=filename) else: destination = self.download_output_to(dataset_details, output_directory, filename=filename) if filename is None: basename = parent_basename else: basename = os.path.basename(filename) return {"path": destination, "basename": basename} ctx.vlog("collecting outputs to directory %s" % output_directory) for runnable_output in get_outputs(self._runnable, gi=self._user_gi): output_id = runnable_output.get_id() if not output_id: ctx.vlog("Workflow output identified without an ID (label), skipping") continue output_dict_value = None is_cwl = self._runnable.type in [RunnableType.cwl_workflow, RunnableType.cwl_tool] output_src = self.output_src(runnable_output) if not is_cwl and output_src["src"] == "hda": output_dataset_id = output_src["id"] dataset = self._get_metadata("dataset", output_dataset_id) dataset_dict = get_dataset(dataset) ctx.vlog("populated destination [%s]" % dataset_dict["path"]) if dataset["file_ext"] == "expression.json": with open(dataset_dict["path"], "r") as f: output_dict_value = json.load(f) else: output_dict_value = output_properties(**dataset_dict) else: output_dataset_id = output_src["id"] galaxy_output = self.to_galaxy_output(runnable_output) cwl_output = output_to_cwl_json( galaxy_output, self._get_metadata, get_dataset, self._get_extra_files, pseduo_location=True, ) if is_cwl: output_dict_value = cwl_output else: def attach_file_properties(collection, cwl_output): elements = collection["elements"] assert len(elements) == len(cwl_output) for element, cwl_output_element in zip(elements, cwl_output): element["_output_object"] = cwl_output_element if isinstance(cwl_output_element, list): assert "elements" in element["object"] attach_file_properties(element["object"], cwl_output_element) output_metadata = self._get_metadata("dataset_collection", output_dataset_id) attach_file_properties(output_metadata, cwl_output) output_dict_value = output_metadata outputs_dict[output_id] = output_dict_value self._outputs_dict = outputs_dict ctx.vlog("collected outputs [%s]" % self._outputs_dict)
def collect_outputs(self, ctx, output_directory): assert self._outputs_dict is None, "collect_outputs pre-condition violated" outputs_dict = {} if not output_directory: # TODO: rather than creating a directory just use # Galaxy paths if they are available in this # configuration. output_directory = tempfile.mkdtemp() def get_dataset(dataset_details, filename=None): parent_basename = dataset_details.get("cwl_file_name") if not parent_basename: parent_basename = dataset_details.get("name") file_ext = dataset_details["file_ext"] if file_ext == "directory": # TODO: rename output_directory to outputs_directory because we can have output directories # and this is confusing... the_output_directory = os.path.join(output_directory, parent_basename) safe_makedirs(the_output_directory) destination = self.download_output_to(dataset_details, the_output_directory, filename=filename) else: destination = self.download_output_to(dataset_details, output_directory, filename=filename) if filename is None: basename = parent_basename else: basename = os.path.basename(filename) return {"path": destination, "basename": basename} ctx.vlog("collecting outputs to directory %s" % output_directory) for runnable_output in get_outputs(self._runnable): output_id = runnable_output.get_id() if not output_id: ctx.vlog("Workflow output identified without an ID (label), skipping") continue output_dict_value = None if self._runnable.type in [RunnableType.cwl_workflow, RunnableType.cwl_tool]: galaxy_output = self.to_galaxy_output(runnable_output) cwl_output = output_to_cwl_json( galaxy_output, self._get_metadata, get_dataset, self._get_extra_files, pseduo_location=True, ) output_dict_value = cwl_output else: # TODO: deprecate this route for finding workflow outputs, # it is a brittle and bad approach... output_dataset_id = self.output_dataset_id(runnable_output) dataset = self._get_metadata("dataset", output_dataset_id) dataset_dict = get_dataset(dataset) ctx.vlog("populated destination [%s]" % dataset_dict["path"]) if dataset["file_ext"] == "expression.json": with open(dataset_dict["path"], "r") as f: output_dict_value = json.load(f) else: output_dict_value = output_properties(**dataset_dict) outputs_dict[output_id] = output_dict_value self._outputs_dict = outputs_dict ctx.vlog("collected outputs [%s]" % self._outputs_dict)