def __populate_output_dataset_wrappers(self, param_dict, output_datasets, output_paths, job_working_directory): output_dataset_paths = dataset_path_rewrites(output_paths) for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) # if desired. real_path = hda.file_name if real_path in output_dataset_paths: dataset_path = output_dataset_paths[real_path] param_dict[name] = DatasetFilenameWrapper(hda, dataset_path=dataset_path) try: open(dataset_path.false_path, 'w').close() except EnvironmentError: pass # May well not exist - e.g. Pulsar. else: param_dict[name] = DatasetFilenameWrapper(hda) # Provide access to a path to store additional files # TODO: path munging for cluster/dataset server relocatability store_by = getattr(hda.dataset.object_store, "store_by", "id") file_name = "dataset_%s_files" % getattr(hda.dataset, store_by) param_dict[name].files_path = os.path.abspath(os.path.join(job_working_directory, "working", file_name)) for out_name, output in self.tool.outputs.items(): if out_name not in param_dict and output.filters: # Assume the reason we lack this output is because a filter # failed to pass; for tool writing convienence, provide a # NoneDataset ext = getattr(output, "format", None) # populate only for output datasets (not collections) param_dict[out_name] = NoneDataset(datatypes_registry=self.app.datatypes_registry, ext=ext)
def __populate_output_dataset_wrappers(self, param_dict, output_datasets, job_working_directory): for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) # if desired. param_dict[name] = DatasetFilenameWrapper(hda, compute_environment=self.compute_environment, io_type="output") output_path = str(param_dict[name]) # Conditionally create empty output: # - may already exist (e.g. symlink output) # - parent directory might not exist (e.g. Pulsar) if not os.path.exists(output_path) and os.path.exists(os.path.dirname(output_path)): open(output_path, 'w').close() # Provide access to a path to store additional files # TODO: move compute path logic into compute environment, move setting files_path # logic into DatasetFilenameWrapper. Currently this sits in the middle and glues # stuff together inconsistently with the way the rest of path rewriting works. file_name = hda.dataset.extra_files_path_name param_dict[name].files_path = os.path.abspath(os.path.join(job_working_directory, "working", file_name)) for out_name, output in self.tool.outputs.items(): if out_name not in param_dict and output.filters: # Assume the reason we lack this output is because a filter # failed to pass; for tool writing convienence, provide a # NoneDataset ext = getattr(output, "format", None) # populate only for output datasets (not collections) param_dict[out_name] = NoneDataset(datatypes_registry=self.app.datatypes_registry, ext=ext)
def __init__(self, dataset, datatypes_registry=None, tool=None, name=None, compute_environment=None, identifier=None, io_type="input", formats=None): if not dataset: try: # TODO: allow this to work when working with grouping ext = tool.inputs[name].extensions[0] except Exception: ext = 'data' self.dataset = wrap_with_safe_string( NoneDataset(datatypes_registry=datatypes_registry, ext=ext), no_wrap_classes=ToolParameterValueWrapper) else: # Tool wrappers should not normally be accessing .dataset directly, # so we will wrap it and keep the original around for file paths # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? if formats: target_ext, converted_dataset = dataset.find_conversion_destination( formats) if target_ext and converted_dataset: dataset = converted_dataset self.unsanitized = dataset self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper) self.metadata = self.MetadataWrapper(dataset, compute_environment) if hasattr(dataset, 'tags'): self.groups = { tag.user_value.lower() for tag in dataset.tags if tag.user_tname == 'group' } else: # May be a 'FakeDatasetAssociation' self.groups = set() self.compute_environment = compute_environment # TODO: lazy initialize this... self.__io_type = io_type if self.__io_type == "input": path_rewrite = compute_environment and dataset and compute_environment.input_path_rewrite( dataset) if path_rewrite: self.false_path = path_rewrite else: self.false_path = None else: path_rewrite = compute_environment and compute_environment.output_path_rewrite( dataset) if path_rewrite: self.false_path = path_rewrite else: self.false_path = None self.datatypes_registry = datatypes_registry self._element_identifier = identifier
def __init__( self, dataset: Optional[DatasetInstance], datatypes_registry: Optional["Registry"] = None, tool: Optional["Tool"] = None, name: Optional[str] = None, compute_environment: Optional["ComputeEnvironment"] = None, identifier: Optional[str] = None, io_type: str = "input", formats: Optional[List[str]] = None, ) -> None: if not dataset: try: # TODO: allow this to work when working with grouping ext = tool.inputs[name].extensions[ 0] # type: ignore[union-attr] except Exception: ext = "data" self.dataset = cast( DatasetInstance, wrap_with_safe_string( NoneDataset(datatypes_registry=datatypes_registry, ext=ext), no_wrap_classes=ToolParameterValueWrapper, ), ) else: # Tool wrappers should not normally be accessing .dataset directly, # so we will wrap it and keep the original around for file paths # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper? if formats: direct_match, target_ext, converted_dataset = dataset.find_conversion_destination( formats) if not direct_match and target_ext and converted_dataset: dataset = converted_dataset self.unsanitized: DatasetInstance = dataset self.dataset = wrap_with_safe_string( dataset, no_wrap_classes=ToolParameterValueWrapper) assert dataset self.metadata = self.MetadataWrapper(dataset, compute_environment) if isinstance(dataset, HasTags): self.groups = { tag.user_value.lower() for tag in dataset.tags if tag.user_tname == 'group' } else: # May be a 'FakeDatasetAssociation' self.groups = set() self.compute_environment = compute_environment # TODO: lazy initialize this... self.__io_type = io_type if self.__io_type == "input": path_rewrite = compute_environment and dataset and compute_environment.input_path_rewrite( dataset) if path_rewrite: self.false_path = path_rewrite else: self.false_path = None else: path_rewrite = compute_environment and compute_environment.output_path_rewrite( dataset) if path_rewrite: self.false_path = path_rewrite else: self.false_path = None self.datatypes_registry = datatypes_registry self._element_identifier = identifier