def __populate_output_collection_wrappers(self, param_dict, output_collections, output_paths, job_working_directory): output_dataset_paths = dataset_path_rewrites( output_paths ) tool = self.tool for name, out_collection in output_collections.items(): if name not in tool.output_collections: continue # message_template = "Name [%s] not found in tool.output_collections %s" # message = message_template % ( name, tool.output_collections ) # raise AssertionError( message ) wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, dataset_paths=output_dataset_paths, tool=tool, name=name ) wrapper = DatasetCollectionWrapper( job_working_directory, out_collection, **wrapper_kwds ) param_dict[ name ] = wrapper # TODO: Handle nested collections... output_def = tool.output_collections[ name ] for element_identifier, output_def in output_def.outputs.items(): if not output_def.implicit: dataset_wrapper = wrapper[ element_identifier ] param_dict[ output_def.name ] = dataset_wrapper log.info("Updating param_dict for %s with %s" % (output_def.name, dataset_wrapper) )
def __populate_output_dataset_wrappers(self, param_dict, output_datasets, output_paths, job_working_directory): output_dataset_paths = dataset_path_rewrites( output_paths ) for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) # if desired. real_path = hda.file_name if real_path in output_dataset_paths: dataset_path = output_dataset_paths[ real_path ] param_dict[name] = DatasetFilenameWrapper( hda, dataset_path=dataset_path ) try: open( dataset_path.false_path, 'w' ).close() except EnvironmentError: pass # May well not exist - e.g. Pulsar. else: param_dict[name] = DatasetFilenameWrapper( hda ) # Provide access to a path to store additional files # TODO: path munging for cluster/dataset server relocatability param_dict[name].files_path = os.path.abspath(os.path.join( job_working_directory, "dataset_%s_files" % (hda.dataset.id) )) for child in hda.children: param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child ) for out_name, output in self.tool.outputs.iteritems(): if out_name not in param_dict and output.filters: # Assume the reason we lack this output is because a filter # failed to pass; for tool writing convienence, provide a # NoneDataset ext = getattr( output, "format", None ) # populate only for output datasets (not collections) param_dict[ out_name ] = NoneDataset( datatypes_registry=self.app.datatypes_registry, ext=ext )
def build_param_dict( self, incoming, input_datasets, output_datasets, output_collections, output_paths, job_working_directory, input_paths=[] ): """ Build the dictionary of parameters for substituting into the command line. Each value is wrapped in a `InputValueWrapper`, which allows all the attributes of the value to be used in the template, *but* when the __str__ method is called it actually calls the `to_param_dict_string` method of the associated input. """ param_dict = dict() def input(): raise SyntaxError("Unbound variable input.") # Don't let $input hang Python evaluation process. param_dict["input"] = input param_dict.update(self.tool.template_macro_params) # All parameters go into the param_dict param_dict.update( incoming ) input_dataset_paths = dataset_path_rewrites( input_paths ) self.__populate_wrappers(param_dict, input_datasets, input_dataset_paths, job_working_directory) self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) self.__populate_output_collection_wrappers(param_dict, output_collections, output_paths, job_working_directory) self.__populate_unstructured_path_rewrites(param_dict) # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. self.__sanitize_param_dict( param_dict ) # Parameters added after this line are not sanitized self.__populate_non_job_params(param_dict) # Return the dictionary of parameters #print "In build_param_dict just before returning it" #print param_dict #print "Donnerr" return param_dict
def __populate_output_collection_wrappers(self, param_dict, output_collections, output_paths, job_working_directory): output_dataset_paths = dataset_path_rewrites(output_paths) tool = self.tool for name, out_collection in output_collections.items(): if name not in tool.output_collections: continue # message_template = "Name [%s] not found in tool.output_collections %s" # message = message_template % ( name, tool.output_collections ) # raise AssertionError( message ) wrapper_kwds = dict(datatypes_registry=self.app.datatypes_registry, dataset_paths=output_dataset_paths, tool=tool, name=name) wrapper = DatasetCollectionWrapper(job_working_directory, out_collection, **wrapper_kwds) param_dict[name] = wrapper # TODO: Handle nested collections... output_def = tool.output_collections[name] for element_identifier, output_def in output_def.outputs.items(): if not output_def.implicit: dataset_wrapper = wrapper[element_identifier] param_dict[output_def.name] = dataset_wrapper log.info("Updating param_dict for %s with %s" % (output_def.name, dataset_wrapper))
def build_param_dict( self, incoming, input_datasets, output_datasets, output_collections, output_paths, job_working_directory, input_paths=[] ): """ Build the dictionary of parameters for substituting into the command line. Each value is wrapped in a `InputValueWrapper`, which allows all the attributes of the value to be used in the template, *but* when the __str__ method is called it actually calls the `to_param_dict_string` method of the associated input. """ param_dict = dict() def input(): raise SyntaxError("Unbound variable input.") # Don't let $input hang Python evaluation process. param_dict["input"] = input param_dict.update(self.tool.template_macro_params) # All parameters go into the param_dict param_dict.update( incoming ) input_dataset_paths = dataset_path_rewrites( input_paths ) self.__populate_wrappers(param_dict, input_datasets, input_dataset_paths, job_working_directory) self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) self.__populate_output_collection_wrappers(param_dict, output_collections, output_paths, job_working_directory) self.__populate_unstructured_path_rewrites(param_dict) # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. self.__sanitize_param_dict( param_dict ) # Parameters added after this line are not sanitized self.__populate_non_job_params(param_dict) # Return the dictionary of parameters return param_dict
def build_param_dict(self, incoming, input_datasets, output_datasets, output_paths, job_working_directory, input_paths=[]): """ Build the dictionary of parameters for substituting into the command line. Each value is wrapped in a `InputValueWrapper`, which allows all the attributes of the value to be used in the template, *but* when the __str__ method is called it actually calls the `to_param_dict_string` method of the associated input. """ param_dict = dict() param_dict.update(self.tool.template_macro_params) # All parameters go into the param_dict param_dict.update(incoming) input_dataset_paths = dataset_path_rewrites(input_paths) self.__populate_wrappers(param_dict, input_dataset_paths) self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) self.__populate_unstructured_path_rewrites(param_dict) self.__populate_non_job_params(param_dict) # Return the dictionary of parameters return param_dict