def __populate_output_dataset_wrappers(self, param_dict, output_datasets, output_paths, job_working_directory): output_dataset_paths = dataset_path_rewrites(output_paths) for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) # if desired. real_path = hda.file_name if real_path in output_dataset_paths: dataset_path = output_dataset_paths[real_path] param_dict[name] = DatasetFilenameWrapper(hda, dataset_path=dataset_path) try: open(dataset_path.false_path, 'w').close() except EnvironmentError: pass # May well not exist - e.g. Pulsar. else: param_dict[name] = DatasetFilenameWrapper(hda) # Provide access to a path to store additional files # TODO: path munging for cluster/dataset server relocatability store_by = getattr(hda.dataset.object_store, "store_by", "id") file_name = "dataset_%s_files" % getattr(hda.dataset, store_by) param_dict[name].files_path = os.path.abspath(os.path.join(job_working_directory, "working", file_name)) for out_name, output in self.tool.outputs.items(): if out_name not in param_dict and output.filters: # Assume the reason we lack this output is because a filter # failed to pass; for tool writing convienence, provide a # NoneDataset ext = getattr(output, "format", None) # populate only for output datasets (not collections) param_dict[out_name] = NoneDataset(datatypes_registry=self.app.datatypes_registry, ext=ext)
def __populate_output_dataset_wrappers(self, param_dict, output_datasets, output_paths, job_working_directory): output_dataset_paths = dataset_path_rewrites( output_paths ) for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) # if desired. real_path = hda.file_name if real_path in output_dataset_paths: dataset_path = output_dataset_paths[ real_path ] param_dict[name] = DatasetFilenameWrapper( hda, dataset_path=dataset_path ) try: open( dataset_path.false_path, 'w' ).close() except EnvironmentError: pass # May well not exist - e.g. Pulsar. else: param_dict[name] = DatasetFilenameWrapper( hda ) # Provide access to a path to store additional files # TODO: path munging for cluster/dataset server relocatability param_dict[name].files_path = os.path.abspath(os.path.join( job_working_directory, "dataset_%s_files" % (hda.dataset.id) )) for child in hda.children: param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) for out_name, output in self.tool.outputs.iteritems(): if out_name not in param_dict and output.filters: # Assume the reason we lack this output is because a filter # failed to pass; for tool writing convienence, provide a # NoneDataset ext = getattr( output, "format", None ) # populate only for output datasets (not collections) param_dict[ out_name ] = NoneDataset( datatypes_registry=self.app.datatypes_registry, ext=ext )
def __populate_output_dataset_wrappers(self, param_dict, output_datasets, job_working_directory): for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) # if desired. param_dict[name] = DatasetFilenameWrapper(hda, compute_environment=self.compute_environment, io_type="output") output_path = str(param_dict[name]) # Conditionally create empty output: # - may already exist (e.g. symlink output) # - parent directory might not exist (e.g. Pulsar) if not os.path.exists(output_path) and os.path.exists(os.path.dirname(output_path)): open(output_path, 'w').close() # Provide access to a path to store additional files # TODO: move compute path logic into compute environment, move setting files_path # logic into DatasetFilenameWrapper. Currently this sits in the middle and glues # stuff together inconsistently with the way the rest of path rewriting works. file_name = hda.dataset.extra_files_path_name param_dict[name].files_path = os.path.abspath(os.path.join(job_working_directory, "working", file_name)) for out_name, output in self.tool.outputs.items(): if out_name not in param_dict and output.filters: # Assume the reason we lack this output is because a filter # failed to pass; for tool writing convienence, provide a # NoneDataset ext = getattr(output, "format", None) # populate only for output datasets (not collections) param_dict[out_name] = NoneDataset(datatypes_registry=self.app.datatypes_registry, ext=ext)
def test_dataset_false_extra_files_path(): dataset = cast(DatasetInstance, MockDataset()) wrapper = DatasetFilenameWrapper(dataset) assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH new_path = "/new/path/dataset_123.dat" dataset_path = DatasetPath(123, MOCK_DATASET_PATH, false_path=new_path) wrapper = DatasetFilenameWrapper(dataset, compute_environment=cast(ComputeEnvironment, MockComputeEnvironment(dataset_path))) # Setting false_path is not enough to override assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH new_files_path = "/new/path/dataset_123_files" wrapper = DatasetFilenameWrapper(dataset, compute_environment=cast(ComputeEnvironment, MockComputeEnvironment(false_path=new_path, false_extra_files_path=new_files_path))) assert wrapper.extra_files_path == new_files_path
def test_dataset_wrapper_false_path(): dataset = MockDataset() new_path = "/new/path/dataset_123.dat" wrapper = DatasetFilenameWrapper(dataset, dataset_path=Bunch(false_path=new_path)) assert str(wrapper) == new_path assert wrapper.file_name == new_path
def test_dataset_wrapper(): dataset = MockDataset() wrapper = DatasetFilenameWrapper(dataset) assert str(wrapper) == MOCK_DATASET_PATH assert wrapper.file_name == MOCK_DATASET_PATH assert wrapper.ext == MOCK_DATASET_EXT
def test_dataset_false_extra_files_path(): dataset = MockDataset() wrapper = DatasetFilenameWrapper(dataset) assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH new_path = "/new/path/dataset_123.dat" dataset_path = DatasetPath(123, MOCK_DATASET_PATH, false_path=new_path) wrapper = DatasetFilenameWrapper(dataset, dataset_path=dataset_path) # Setting false_path is not enough to override assert wrapper.extra_files_path == MOCK_DATASET_EXTRA_FILES_PATH new_files_path = "/new/path/dataset_123_files" dataset_path = DatasetPath(123, MOCK_DATASET_PATH, false_path=new_path, false_extra_files_path=new_files_path) wrapper = DatasetFilenameWrapper(dataset, dataset_path=dataset_path) assert wrapper.extra_files_path == new_files_path
def __populate_input_dataset_wrappers(self, param_dict, input_datasets): # TODO: Update this method for dataset collections? Need to test. -John. # FIXME: when self.check_values==True, input datasets are being wrapped # twice (above and below, creating 2 separate # DatasetFilenameWrapper objects - first is overwritten by # second), is this necessary? - if we get rid of this way to # access children, can we stop this redundancy, or is there # another reason for this? # - Only necessary when self.check_values is False (==external dataset # tool?: can this be abstracted out as part of being a datasouce tool?) # For now we try to not wrap unnecessarily, but this should be untangled at some point. for name, data in input_datasets.items(): param_dict_value = param_dict.get(name, None) if data and param_dict_value is None: # We may have a nested parameter that is not fully prefixed. # We try recovering from param_dict, but tool authors should really use fully-qualified # variables wrappers = find_instance_nested(param_dict, instances=(DatasetFilenameWrapper, DatasetListWrapper), match_key=name) if len(wrappers) == 1: wrapper = wrappers[0] param_dict[name] = wrapper continue if not isinstance(param_dict_value, (DatasetFilenameWrapper, DatasetListWrapper)): wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, tool=self, name=name, compute_environment=self.compute_environment, ) param_dict[name] = DatasetFilenameWrapper(data, **wrapper_kwds)
def wrap_values(self, inputs, input_values, skip_missing_values=False): trans = self.trans tool = self.tool incoming = self.incoming # Wrap tool inputs as necessary for input in inputs.itervalues(): if input.name not in input_values and skip_missing_values: continue value = input_values[input.name] if isinstance(input, Repeat): for d in input_values[input.name]: self.wrap_values(input.inputs, d, skip_missing_values=skip_missing_values) elif isinstance(input, Conditional): values = input_values[input.name] current = values["__current_case__"] self.wrap_values(input.cases[current].inputs, values, skip_missing_values=skip_missing_values) elif isinstance(input, Section): values = input_values[input.name] self.wrap_values(input.inputs, values, skip_missing_values=skip_missing_values) elif isinstance(input, DataToolParameter) and input.multiple: value = input_values[input.name] dataset_instances = DatasetListWrapper.to_dataset_instances( value) input_values[ input.name ] = \ DatasetListWrapper( None, dataset_instances, datatypes_registry=trans.app.datatypes_registry, tool=tool, name=input.name ) elif isinstance(input, DataToolParameter): input_values[ input.name ] = \ DatasetFilenameWrapper( input_values[ input.name ], datatypes_registry=trans.app.datatypes_registry, tool=tool, name=input.name ) elif isinstance(input, SelectToolParameter): input_values[input.name] = SelectToolParameterWrapper( input, input_values[input.name], tool.app, other_values=incoming) elif isinstance(input, DataCollectionToolParameter): input_values[input.name] = DatasetCollectionWrapper( None, input_values[input.name], datatypes_registry=trans.app.datatypes_registry, tool=tool, name=input.name, ) else: input_values[input.name] = InputValueWrapper( input, value, incoming)
def test_dataset_wrapper_false_path(): dataset = MockDataset() new_path = "/new/path/dataset_123.dat" wrapper = DatasetFilenameWrapper( dataset, compute_environment=MockComputeEnvironment(false_path=new_path)) assert str(wrapper) == new_path assert wrapper.file_name == new_path
def wrap_values(self, inputs, input_values, skip_missing_values=False): trans = self.trans tool = self.tool incoming = self.incoming element_identifier_mapper = ElementIdentifierMapper(self._input_datasets) # Wrap tool inputs as necessary for input in inputs.values(): if input.name not in input_values and skip_missing_values: continue value = input_values[input.name] copy_identifiers(destination=value, source=input_values) if isinstance(input, Repeat): for d in value: copy_identifiers(destination=d, source=value) self.wrap_values(input.inputs, d, skip_missing_values=skip_missing_values) elif isinstance(input, Conditional): values = value current = values["__current_case__"] self.wrap_values(input.cases[current].inputs, values, skip_missing_values=skip_missing_values) elif isinstance(input, Section): values = value self.wrap_values(input.inputs, values, skip_missing_values=skip_missing_values) elif isinstance(input, DataToolParameter) and input.multiple: dataset_instances = DatasetListWrapper.to_dataset_instances(value) input_values[input.name] = \ DatasetListWrapper(None, dataset_instances, datatypes_registry=trans.app.datatypes_registry, tool=tool, name=input.name, formats=input.formats) elif isinstance(input, DataToolParameter): wrapper_kwds = dict( datatypes_registry=trans.app.datatypes_registry, tool=tool, name=input.name, formats=input.formats ) element_identifier = element_identifier_mapper.identifier(value, input_values) if element_identifier: wrapper_kwds["identifier"] = element_identifier input_values[input.name] = DatasetFilenameWrapper(value, **wrapper_kwds) elif isinstance(input, SelectToolParameter): input_values[input.name] = SelectToolParameterWrapper(input, value, other_values=incoming) elif isinstance(input, DataCollectionToolParameter): input_values[input.name] = DatasetCollectionWrapper( None, value, datatypes_registry=trans.app.datatypes_registry, tool=tool, name=input.name, ) else: input_values[input.name] = InputValueWrapper(input, value, incoming)
def __populate_input_dataset_wrappers(self, param_dict, input_datasets, input_dataset_paths): # TODO: Update this method for dataset collections? Need to test. -John. # FIXME: when self.check_values==True, input datasets are being wrapped # twice (above and below, creating 2 separate # DatasetFilenameWrapper objects - first is overwritten by # second), is this necessary? - if we get rid of this way to # access children, can we stop this redundancy, or is there # another reason for this? # - Only necessary when self.check_values is False (==external dataset # tool?: can this be abstracted out as part of being a datasouce tool?) # - But we still want (ALWAYS) to wrap input datasets (this should be # checked to prevent overhead of creating a new object?) # Additionally, datasets go in the param dict. We wrap them such that # if the bare variable name is used it returns the filename (for # backwards compatibility). We also add any child datasets to the # the param dict encoded as: # "_CHILD___{dataset_name}___{child_designation}", # but this should be considered DEPRECATED, instead use: # $dataset.get_child( 'name' ).filename for name, data in input_datasets.items(): param_dict_value = param_dict.get(name, None) if not isinstance(param_dict_value, (DatasetFilenameWrapper, DatasetListWrapper)): wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, tool=self, name=name, ) if data: real_path = data.file_name if real_path in input_dataset_paths: dataset_path = input_dataset_paths[real_path] wrapper_kwds['dataset_path'] = dataset_path param_dict[name] = DatasetFilenameWrapper(data, **wrapper_kwds) if data: for child in data.children: param_dict["_CHILD___%s___%s" % (name, child.designation )] = DatasetFilenameWrapper(child)
def __populate_output_dataset_wrappers(self, param_dict, output_datasets, output_paths, job_working_directory): output_dataset_paths = dataset_path_rewrites(output_paths) for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) # if desired. real_path = hda.file_name if real_path in output_dataset_paths: dataset_path = output_dataset_paths[real_path] param_dict[name] = DatasetFilenameWrapper( hda, dataset_path=dataset_path) try: open(dataset_path.false_path, 'w').close() except EnvironmentError: pass # May well not exist - e.g. LWR. else: param_dict[name] = DatasetFilenameWrapper(hda) # Provide access to a path to store additional files # TODO: path munging for cluster/dataset server relocatability param_dict[name].files_path = os.path.abspath( os.path.join(job_working_directory, "dataset_%s_files" % (hda.dataset.id))) for child in hda.children: param_dict["_CHILD___%s___%s" % (name, child.designation)] = DatasetFilenameWrapper(child) for out_name, output in self.tool.outputs.iteritems(): if out_name not in param_dict and output.filters: # Assume the reason we lack this output is because a filter # failed to pass; for tool writing convienence, provide a # NoneDataset param_dict[out_name] = NoneDataset( datatypes_registry=self.app.datatypes_registry, ext=output.format)
def wrap_input(input_values, input): value = input_values[input.name] if isinstance(input, DataToolParameter) and input.multiple: dataset_instances = DatasetListWrapper.to_dataset_instances(value) input_values[input.name] = \ DatasetListWrapper(job_working_directory, dataset_instances, compute_environment=self.compute_environment, datatypes_registry=self.app.datatypes_registry, tool=self.tool, name=input.name, formats=input.formats) elif isinstance(input, DataToolParameter): dataset = input_values[input.name] wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, tool=self, name=input.name, compute_environment=self.compute_environment ) element_identifier = element_identifier_mapper.identifier(dataset, param_dict) if element_identifier: wrapper_kwds["identifier"] = element_identifier input_values[input.name] = \ DatasetFilenameWrapper(dataset, **wrapper_kwds) elif isinstance(input, DataCollectionToolParameter): dataset_collection = value wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, compute_environment=self.compute_environment, tool=self, name=input.name ) wrapper = DatasetCollectionWrapper( job_working_directory, dataset_collection, **wrapper_kwds ) input_values[input.name] = wrapper elif isinstance(input, SelectToolParameter): if input.multiple: value = listify(value) input_values[input.name] = SelectToolParameterWrapper( input, value, other_values=param_dict, compute_environment=self.compute_environment) else: input_values[input.name] = InputValueWrapper( input, value, param_dict)
def wrap_input(input_values, input): value = input_values[input.name] if isinstance(input, DataToolParameter) and input.multiple: dataset_instances = DatasetListWrapper.to_dataset_instances(value) input_values[input.name] = \ DatasetListWrapper(job_working_directory, dataset_instances, compute_environment=self.compute_environment, datatypes_registry=self.app.datatypes_registry, tool=self.tool, name=input.name, formats=input.formats) elif isinstance(input, DataToolParameter): # FIXME: We're populating param_dict with conversions when # wrapping values, this should happen as a separate # step before wrapping (or call this wrapping step # something more generic) (but iterating this same # list twice would be wasteful) # Add explicit conversions by name to current parent for conversion_name, conversion_extensions, conversion_datatypes in input.conversions: # If we are at building cmdline step, then converters # have already executed direct_match, conv_ext, converted_dataset = input_values[input.name].find_conversion_destination(conversion_datatypes) # When dealing with optional inputs, we'll provide a # valid extension to be used for None converted dataset if not direct_match and not conv_ext: conv_ext = conversion_extensions[0] # input_values[ input.name ] is None when optional # dataset, 'conversion' of optional dataset should # create wrapper around NoneDataset for converter output if input_values[input.name] and not converted_dataset: # Input that converter is based from has a value, # but converted dataset does not exist raise Exception('A path for explicit datatype conversion has not been found: %s --/--> %s' % (input_values[input.name].extension, conversion_extensions)) else: # Trick wrapper into using target conv ext (when # None) without actually being a tool parameter input_values[conversion_name] = \ DatasetFilenameWrapper(converted_dataset, datatypes_registry=self.app.datatypes_registry, tool=Bunch(conversion_name=Bunch(extensions=conv_ext)), name=conversion_name) # Wrap actual input dataset dataset = input_values[input.name] wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, tool=self, name=input.name, compute_environment=self.compute_environment ) element_identifier = element_identifier_mapper.identifier(dataset, param_dict) if element_identifier: wrapper_kwds["identifier"] = element_identifier input_values[input.name] = \ DatasetFilenameWrapper(dataset, **wrapper_kwds) elif isinstance(input, DataCollectionToolParameter): dataset_collection = value wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, compute_environment=self.compute_environment, tool=self, name=input.name ) wrapper = DatasetCollectionWrapper( job_working_directory, dataset_collection, **wrapper_kwds ) input_values[input.name] = wrapper elif isinstance(input, SelectToolParameter): if input.multiple: value = listify(value) input_values[input.name] = SelectToolParameterWrapper( input, value, other_values=param_dict, compute_environment=self.compute_environment) else: input_values[input.name] = InputValueWrapper( input, value, param_dict)
def wrap_input(input_values, input): if isinstance(input, DataToolParameter) and input.multiple: dataset_instances = input_values[input.name] if isinstance(dataset_instances, model.HistoryDatasetCollectionAssociation): dataset_instances = dataset_instances.collection.dataset_instances[:] input_values[ input.name ] = \ DatasetListWrapper( dataset_instances, dataset_paths=input_dataset_paths, datatypes_registry=self.app.datatypes_registry, tool=self.tool, name=input.name ) elif isinstance(input, DataToolParameter): ## FIXME: We're populating param_dict with conversions when ## wrapping values, this should happen as a separate ## step before wrapping (or call this wrapping step ## something more generic) (but iterating this same ## list twice would be wasteful) # Add explicit conversions by name to current parent for conversion_name, conversion_extensions, conversion_datatypes in input.conversions: # If we are at building cmdline step, then converters # have already executed conv_ext, converted_dataset = input_values[ input.name].find_conversion_destination( conversion_datatypes) # When dealing with optional inputs, we'll provide a # valid extension to be used for None converted dataset if not conv_ext: conv_ext = conversion_extensions[0] # input_values[ input.name ] is None when optional # dataset, 'conversion' of optional dataset should # create wrapper around NoneDataset for converter output if input_values[input.name] and not converted_dataset: # Input that converter is based from has a value, # but converted dataset does not exist raise Exception( 'A path for explicit datatype conversion has not been found: %s --/--> %s' % (input_values[input.name].extension, conversion_extensions)) else: # Trick wrapper into using target conv ext (when # None) without actually being a tool parameter input_values[ conversion_name ] = \ DatasetFilenameWrapper( converted_dataset, datatypes_registry=self.app.datatypes_registry, tool=Bunch( conversion_name=Bunch( extensions=conv_ext ) ), name=conversion_name ) # Wrap actual input dataset dataset = input_values[input.name] wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, tool=self, name=input.name) if dataset: #A None dataset does not have a filename real_path = dataset.file_name if real_path in input_dataset_paths: wrapper_kwds["dataset_path"] = input_dataset_paths[ real_path] input_values[ input.name ] = \ DatasetFilenameWrapper( dataset, **wrapper_kwds ) elif isinstance(input, DataCollectionToolParameter): dataset_collection = input_values[input.name] wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, dataset_paths=input_dataset_paths, tool=self, name=input.name) wrapper = DatasetCollectionWrapper(dataset_collection, **wrapper_kwds) input_values[input.name] = wrapper elif isinstance(input, SelectToolParameter): input_values[input.name] = SelectToolParameterWrapper( input, input_values[input.name], self.app, other_values=param_dict, path_rewriter=self.unstructured_path_rewriter) elif isinstance(input, LibraryDatasetToolParameter): # TODO: Handle input rewrites in here? How to test LibraryDatasetToolParameters? input_values[input.name] = LibraryDatasetValueWrapper( input, input_values[input.name], param_dict) else: input_values[input.name] = InputValueWrapper( input, input_values[input.name], param_dict)