Ejemplo n.º 1
0
 def __populate_output_dataset_wrappers(self, param_dict, output_datasets, output_paths, job_working_directory):
     output_dataset_paths = dataset_path_rewrites(output_paths)
     for name, hda in output_datasets.items():
         # Write outputs to the working directory (for security purposes)
         # if desired.
         real_path = hda.file_name
         if real_path in output_dataset_paths:
             dataset_path = output_dataset_paths[real_path]
             param_dict[name] = DatasetFilenameWrapper(hda, dataset_path=dataset_path)
             try:
                 open(dataset_path.false_path, 'w').close()
             except EnvironmentError:
                 pass  # May well not exist - e.g. Pulsar.
         else:
             param_dict[name] = DatasetFilenameWrapper(hda)
         # Provide access to a path to store additional files
         # TODO: path munging for cluster/dataset server relocatability
         store_by = getattr(hda.dataset.object_store, "store_by", "id")
         file_name = "dataset_%s_files" % getattr(hda.dataset, store_by)
         param_dict[name].files_path = os.path.abspath(os.path.join(job_working_directory, "working", file_name))
     for out_name, output in self.tool.outputs.items():
         if out_name not in param_dict and output.filters:
             # Assume the reason we lack this output is because a filter
             # failed to pass; for tool writing convienence, provide a
             # NoneDataset
             ext = getattr(output, "format", None)  # populate only for output datasets (not collections)
             param_dict[out_name] = NoneDataset(datatypes_registry=self.app.datatypes_registry, ext=ext)
Ejemplo n.º 2
0
    def __populate_output_dataset_wrappers(self, param_dict, output_datasets, job_working_directory):
        for name, hda in output_datasets.items():
            # Write outputs to the working directory (for security purposes)
            # if desired.
            param_dict[name] = DatasetFilenameWrapper(hda, compute_environment=self.compute_environment, io_type="output")
            output_path = str(param_dict[name])
            # Conditionally create empty output:
            # - may already exist (e.g. symlink output)
            # - parent directory might not exist (e.g. Pulsar)
            if not os.path.exists(output_path) and os.path.exists(os.path.dirname(output_path)):
                open(output_path, 'w').close()

            # Provide access to a path to store additional files
            # TODO: move compute path logic into compute environment, move setting files_path
            # logic into DatasetFilenameWrapper. Currently this sits in the middle and glues
            # stuff together inconsistently with the way the rest of path rewriting works.
            file_name = hda.dataset.extra_files_path_name
            param_dict[name].files_path = os.path.abspath(os.path.join(job_working_directory, "working", file_name))
        for out_name, output in self.tool.outputs.items():
            if out_name not in param_dict and output.filters:
                # Assume the reason we lack this output is because a filter
                # failed to pass; for tool writing convienence, provide a
                # NoneDataset
                ext = getattr(output, "format", None)  # populate only for output datasets (not collections)
                param_dict[out_name] = NoneDataset(datatypes_registry=self.app.datatypes_registry, ext=ext)
Ejemplo n.º 3
0
 def __init__(self,
              dataset,
              datatypes_registry=None,
              tool=None,
              name=None,
              compute_environment=None,
              identifier=None,
              io_type="input",
              formats=None):
     if not dataset:
         try:
             # TODO: allow this to work when working with grouping
             ext = tool.inputs[name].extensions[0]
         except Exception:
             ext = 'data'
         self.dataset = wrap_with_safe_string(
             NoneDataset(datatypes_registry=datatypes_registry, ext=ext),
             no_wrap_classes=ToolParameterValueWrapper)
     else:
         # Tool wrappers should not normally be accessing .dataset directly,
         # so we will wrap it and keep the original around for file paths
         # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper?
         if formats:
             target_ext, converted_dataset = dataset.find_conversion_destination(
                 formats)
             if target_ext and converted_dataset:
                 dataset = converted_dataset
         self.unsanitized = dataset
         self.dataset = wrap_with_safe_string(
             dataset, no_wrap_classes=ToolParameterValueWrapper)
         self.metadata = self.MetadataWrapper(dataset, compute_environment)
         if hasattr(dataset, 'tags'):
             self.groups = {
                 tag.user_value.lower()
                 for tag in dataset.tags if tag.user_tname == 'group'
             }
         else:
             # May be a 'FakeDatasetAssociation'
             self.groups = set()
     self.compute_environment = compute_environment
     # TODO: lazy initialize this...
     self.__io_type = io_type
     if self.__io_type == "input":
         path_rewrite = compute_environment and dataset and compute_environment.input_path_rewrite(
             dataset)
         if path_rewrite:
             self.false_path = path_rewrite
         else:
             self.false_path = None
     else:
         path_rewrite = compute_environment and compute_environment.output_path_rewrite(
             dataset)
         if path_rewrite:
             self.false_path = path_rewrite
         else:
             self.false_path = None
     self.datatypes_registry = datatypes_registry
     self._element_identifier = identifier
Ejemplo n.º 4
0
 def __init__(
     self,
     dataset: Optional[DatasetInstance],
     datatypes_registry: Optional["Registry"] = None,
     tool: Optional["Tool"] = None,
     name: Optional[str] = None,
     compute_environment: Optional["ComputeEnvironment"] = None,
     identifier: Optional[str] = None,
     io_type: str = "input",
     formats: Optional[List[str]] = None,
 ) -> None:
     if not dataset:
         try:
             # TODO: allow this to work when working with grouping
             ext = tool.inputs[name].extensions[
                 0]  # type: ignore[union-attr]
         except Exception:
             ext = "data"
         self.dataset = cast(
             DatasetInstance,
             wrap_with_safe_string(
                 NoneDataset(datatypes_registry=datatypes_registry,
                             ext=ext),
                 no_wrap_classes=ToolParameterValueWrapper,
             ),
         )
     else:
         # Tool wrappers should not normally be accessing .dataset directly,
         # so we will wrap it and keep the original around for file paths
         # Should we name this .value to maintain consistency with most other ToolParameterValueWrapper?
         if formats:
             direct_match, target_ext, converted_dataset = dataset.find_conversion_destination(
                 formats)
             if not direct_match and target_ext and converted_dataset:
                 dataset = converted_dataset
         self.unsanitized: DatasetInstance = dataset
         self.dataset = wrap_with_safe_string(
             dataset, no_wrap_classes=ToolParameterValueWrapper)
         assert dataset
         self.metadata = self.MetadataWrapper(dataset, compute_environment)
         if isinstance(dataset, HasTags):
             self.groups = {
                 tag.user_value.lower()
                 for tag in dataset.tags if tag.user_tname == 'group'
             }
         else:
             # May be a 'FakeDatasetAssociation'
             self.groups = set()
     self.compute_environment = compute_environment
     # TODO: lazy initialize this...
     self.__io_type = io_type
     if self.__io_type == "input":
         path_rewrite = compute_environment and dataset and compute_environment.input_path_rewrite(
             dataset)
         if path_rewrite:
             self.false_path = path_rewrite
         else:
             self.false_path = None
     else:
         path_rewrite = compute_environment and compute_environment.output_path_rewrite(
             dataset)
         if path_rewrite:
             self.false_path = path_rewrite
         else:
             self.false_path = None
     self.datatypes_registry = datatypes_registry
     self._element_identifier = identifier