def build( self ): """ Build runtime description of job to execute, evaluate command and config templates corresponding to this tool with these inputs on this compute environment. """ self.extra_filenames = [] self.command_line = None try: self.__build_config_files( ) except Exception as e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Config Files", e) raise e try: self.__build_param_file( ) except Exception as e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Param File", e) raise e try: self.__build_command_line( ) except Exception as e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Command Line", e) raise e try: self.__build_environment_variables() except Exception as e: global_tool_errors.add_error(self.tool.config_file, "Building Environment Variables", e) raise e return self.command_line, self.extra_filenames, self.environment_variables
def build(self): """ Build runtime description of job to execute, evaluate command and config templates corresponding to this tool with these inputs on this compute environment. """ self.extra_filenames = [] self.command_line = None try: self.__build_config_files() except Exception as e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Config Files", e) raise e try: self.__build_param_file() except Exception as e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Param File", e) raise e try: self.__build_command_line() except Exception as e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Command Line", e) raise e try: self.__build_environment_variables() except Exception as e: global_tool_errors.add_error(self.tool.config_file, "Building Environment Variables", e) raise e return self.command_line, self.extra_filenames, self.environment_variables
# capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Config Files", e) raise e try: self.__build_param_file() except Exception, e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Param File", e) raise e try: self.__build_command_line() except Exception, e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Command Line", e) raise e try: self.__build_environment_variables() except Exception, e: global_tool_errors.add_error(self.tool.config_file, "Building Environment Variables", e) raise e return self.command_line, self.extra_filenames, self.environment_variables def __build_command_line(self): """ Build command line to invoke this tool given a populated param_dict """ command = self.tool.command
class ToolEvaluator(object): """ An abstraction linking together a tool and a job runtime to evaluate tool inputs in an isolated, testable manner. """ def __init__(self, app, tool, job, local_working_directory): self.app = app self.job = job self.tool = tool self.local_working_directory = local_working_directory def set_compute_environment(self, compute_environment, get_special=None): """ Setup the compute environment and established the outline of the param_dict for evaluating command and config cheetah templates. """ self.compute_environment = compute_environment self.unstructured_path_rewriter = compute_environment.unstructured_path_rewriter( ) job = self.job incoming = dict([(p.name, p.value) for p in job.parameters]) incoming = self.tool.params_from_strings(incoming, self.app) # Do any validation that could not be done at job creation self.tool.handle_unvalidated_param_values(incoming, self.app) # Restore input / output data lists inp_data = dict([(da.name, da.dataset) for da in job.input_datasets]) out_data = dict([(da.name, da.dataset) for da in job.output_datasets]) inp_data.update([(da.name, da.dataset) for da in job.input_library_datasets]) out_data.update([(da.name, da.dataset) for da in job.output_library_datasets]) out_collections = dict([ (obj.name, obj.dataset_collection_instance) for obj in job.output_dataset_collection_instances ]) out_collections.update([(obj.name, obj.dataset_collection) for obj in job.output_dataset_collections]) if get_special: # Set up output dataset association for export history jobs. Because job # uses a Dataset rather than an HDA or LDA, it's necessary to set up a # fake dataset association that provides the needed attributes for # preparing a job. class FakeDatasetAssociation(object): def __init__(self, dataset=None): self.dataset = dataset self.file_name = dataset.file_name self.metadata = dict() self.children = [] special = get_special() if special: out_data["output_file"] = FakeDatasetAssociation( dataset=special.dataset) # These can be passed on the command line if wanted as $__user_*__ incoming.update( model.User.user_template_environment(job.history and job.history.user)) # Build params, done before hook so hook can use param_dict = self.build_param_dict( incoming, inp_data, out_data, output_collections=out_collections, output_paths=compute_environment.output_paths(), job_working_directory=compute_environment.working_directory(), input_paths=compute_environment.input_paths()) # Certain tools require tasks to be completed prior to job execution # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). self.tool.exec_before_job(self.app, inp_data, out_data, param_dict) # Run the before queue ("exec_before_job") hook self.tool.call_hook('exec_before_job', self.app, inp_data=inp_data, out_data=out_data, tool=self.tool, param_dict=incoming) self.param_dict = param_dict def build_param_dict(self, incoming, input_datasets, output_datasets, output_collections, output_paths, job_working_directory, input_paths=[]): """ Build the dictionary of parameters for substituting into the command line. Each value is wrapped in a `InputValueWrapper`, which allows all the attributes of the value to be used in the template, *but* when the __str__ method is called it actually calls the `to_param_dict_string` method of the associated input. """ param_dict = dict() def input(): raise SyntaxError( "Unbound variable input." ) # Don't let $input hang Python evaluation process. param_dict["input"] = input param_dict.update(self.tool.template_macro_params) # All parameters go into the param_dict param_dict.update(incoming) input_dataset_paths = dataset_path_rewrites(input_paths) self.__populate_wrappers(param_dict, input_dataset_paths) self.__populate_input_dataset_wrappers(param_dict, input_datasets, input_dataset_paths) self.__populate_output_dataset_wrappers(param_dict, output_datasets, output_paths, job_working_directory) self.__populate_output_collection_wrappers(param_dict, output_collections, output_paths, job_working_directory) self.__populate_unstructured_path_rewrites(param_dict) # Call param dict sanitizer, before non-job params are added, as we don't want to sanitize filenames. self.__sanitize_param_dict(param_dict) # Parameters added after this line are not sanitized self.__populate_non_job_params(param_dict) # Return the dictionary of parameters return param_dict def __walk_inputs(self, inputs, input_values, func): def do_walk(inputs, input_values): """ Wraps parameters as neccesary. """ for input in inputs.itervalues(): if isinstance(input, Repeat): for d in input_values[input.name]: do_walk(input.inputs, d) elif isinstance(input, Conditional): values = input_values[input.name] current = values["__current_case__"] do_walk(input.cases[current].inputs, values) elif isinstance(input, Section): values = input_values[input.name] do_walk(input.inputs, values) else: func(input_values, input) do_walk(inputs, input_values) def __populate_wrappers(self, param_dict, input_dataset_paths): def wrap_input(input_values, input): if isinstance(input, DataToolParameter) and input.multiple: value = input_values[input.name] dataset_instances = DatasetListWrapper.to_dataset_instances( value) input_values[ input.name ] = \ DatasetListWrapper( dataset_instances, dataset_paths=input_dataset_paths, datatypes_registry=self.app.datatypes_registry, tool=self.tool, name=input.name ) elif isinstance(input, DataToolParameter): # FIXME: We're populating param_dict with conversions when # wrapping values, this should happen as a separate # step before wrapping (or call this wrapping step # something more generic) (but iterating this same # list twice would be wasteful) # Add explicit conversions by name to current parent for conversion_name, conversion_extensions, conversion_datatypes in input.conversions: # If we are at building cmdline step, then converters # have already executed conv_ext, converted_dataset = input_values[ input.name].find_conversion_destination( conversion_datatypes) # When dealing with optional inputs, we'll provide a # valid extension to be used for None converted dataset if not conv_ext: conv_ext = conversion_extensions[0] # input_values[ input.name ] is None when optional # dataset, 'conversion' of optional dataset should # create wrapper around NoneDataset for converter output if input_values[input.name] and not converted_dataset: # Input that converter is based from has a value, # but converted dataset does not exist raise Exception( 'A path for explicit datatype conversion has not been found: %s --/--> %s' % (input_values[input.name].extension, conversion_extensions)) else: # Trick wrapper into using target conv ext (when # None) without actually being a tool parameter input_values[ conversion_name ] = \ DatasetFilenameWrapper( converted_dataset, datatypes_registry=self.app.datatypes_registry, tool=Bunch( conversion_name=Bunch( extensions=conv_ext ) ), name=conversion_name ) # Wrap actual input dataset dataset = input_values[input.name] wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, tool=self, name=input.name) identifier_key = "%s|__identifier__" % input.name if identifier_key in param_dict: wrapper_kwds["identifier"] = param_dict[identifier_key] if dataset: # A None dataset does not have a filename real_path = dataset.file_name if real_path in input_dataset_paths: wrapper_kwds["dataset_path"] = input_dataset_paths[ real_path] input_values[ input.name ] = \ DatasetFilenameWrapper( dataset, **wrapper_kwds ) elif isinstance(input, DataCollectionToolParameter): dataset_collection = input_values[input.name] wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, dataset_paths=input_dataset_paths, tool=self, name=input.name) wrapper = DatasetCollectionWrapper(dataset_collection, **wrapper_kwds) input_values[input.name] = wrapper elif isinstance(input, SelectToolParameter): input_values[input.name] = SelectToolParameterWrapper( input, input_values[input.name], self.app, other_values=param_dict, path_rewriter=self.unstructured_path_rewriter) else: input_values[input.name] = InputValueWrapper( input, input_values[input.name], param_dict) # HACK: only wrap if check_values is not false, this deals with external # tools where the inputs don't even get passed through. These # tools (e.g. UCSC) should really be handled in a special way. if self.tool.check_values: self.__walk_inputs(self.tool.inputs, param_dict, wrap_input) def __populate_input_dataset_wrappers(self, param_dict, input_datasets, input_dataset_paths): # TODO: Update this method for dataset collections? Need to test. -John. # FIXME: when self.check_values==True, input datasets are being wrapped # twice (above and below, creating 2 separate # DatasetFilenameWrapper objects - first is overwritten by # second), is this necessary? - if we get rid of this way to # access children, can we stop this redundancy, or is there # another reason for this? # - Only necessary when self.check_values is False (==external dataset # tool?: can this be abstracted out as part of being a datasouce tool?) # - But we still want (ALWAYS) to wrap input datasets (this should be # checked to prevent overhead of creating a new object?) # Additionally, datasets go in the param dict. We wrap them such that # if the bare variable name is used it returns the filename (for # backwards compatibility). We also add any child datasets to the # the param dict encoded as: # "_CHILD___{dataset_name}___{child_designation}", # but this should be considered DEPRECATED, instead use: # $dataset.get_child( 'name' ).filename for name, data in input_datasets.items(): param_dict_value = param_dict.get(name, None) if not isinstance(param_dict_value, (DatasetFilenameWrapper, DatasetListWrapper)): wrapper_kwds = dict( datatypes_registry=self.app.datatypes_registry, tool=self, name=name, ) if data: real_path = data.file_name if real_path in input_dataset_paths: dataset_path = input_dataset_paths[real_path] wrapper_kwds['dataset_path'] = dataset_path param_dict[name] = DatasetFilenameWrapper(data, **wrapper_kwds) if data: for child in data.children: param_dict["_CHILD___%s___%s" % (name, child.designation )] = DatasetFilenameWrapper(child) def __populate_output_collection_wrappers(self, param_dict, output_collections, output_paths, job_working_directory): output_dataset_paths = dataset_path_rewrites(output_paths) tool = self.tool for name, out_collection in output_collections.items(): if name not in tool.output_collections: continue # message_template = "Name [%s] not found in tool.output_collections %s" # message = message_template % ( name, tool.output_collections ) # raise AssertionError( message ) wrapper_kwds = dict(datatypes_registry=self.app.datatypes_registry, dataset_paths=output_dataset_paths, tool=tool, name=name) wrapper = DatasetCollectionWrapper(out_collection, **wrapper_kwds) param_dict[name] = wrapper # TODO: Handle nested collections... output_def = tool.output_collections[name] for element_identifier, output_def in output_def.outputs.items(): if not output_def.implicit: dataset_wrapper = wrapper[element_identifier] param_dict[output_def.name] = dataset_wrapper log.info("Updating param_dict for %s with %s" % (output_def.name, dataset_wrapper)) def __populate_output_dataset_wrappers(self, param_dict, output_datasets, output_paths, job_working_directory): output_dataset_paths = dataset_path_rewrites(output_paths) for name, hda in output_datasets.items(): # Write outputs to the working directory (for security purposes) # if desired. real_path = hda.file_name if real_path in output_dataset_paths: dataset_path = output_dataset_paths[real_path] param_dict[name] = DatasetFilenameWrapper( hda, dataset_path=dataset_path) try: open(dataset_path.false_path, 'w').close() except EnvironmentError: pass # May well not exist - e.g. Pulsar. else: param_dict[name] = DatasetFilenameWrapper(hda) # Provide access to a path to store additional files # TODO: path munging for cluster/dataset server relocatability param_dict[name].files_path = os.path.abspath( os.path.join(job_working_directory, "dataset_%s_files" % (hda.dataset.id))) for child in hda.children: param_dict["_CHILD___%s___%s" % (name, child.designation)] = DatasetFilenameWrapper(child) for out_name, output in self.tool.outputs.iteritems(): if out_name not in param_dict and output.filters: # Assume the reason we lack this output is because a filter # failed to pass; for tool writing convienence, provide a # NoneDataset ext = getattr( output, "format", None ) # populate only for output datasets (not collections) param_dict[out_name] = NoneDataset( datatypes_registry=self.app.datatypes_registry, ext=ext) def __populate_non_job_params(self, param_dict): # -- Add useful attributes/functions for use in creating command line. # Function for querying a data table. def get_data_table_entry(table_name, query_attr, query_val, return_attr): """ Queries and returns an entry in a data table. """ if table_name in self.app.tool_data_tables: return self.app.tool_data_tables[table_name].get_entry( query_attr, query_val, return_attr) param_dict[ '__tool_directory__'] = self.compute_environment.tool_directory() param_dict['__get_data_table_entry__'] = get_data_table_entry # We add access to app here, this allows access to app.config, etc param_dict['__app__'] = RawObjectWrapper(self.app) # More convienent access to app.config.new_file_path; we don't need to # wrap a string, but this method of generating additional datasets # should be considered DEPRECATED param_dict[ '__new_file_path__'] = self.compute_environment.new_file_path() # The following points to location (xxx.loc) files which are pointers # to locally cached data param_dict['__tool_data_path__'] = param_dict[ 'GALAXY_DATA_INDEX_DIR'] = self.app.config.tool_data_path # For the upload tool, we need to know the root directory and the # datatypes conf path, so we can load the datatypes registry param_dict['__root_dir__'] = param_dict[ 'GALAXY_ROOT_DIR'] = os.path.abspath(self.app.config.root) param_dict['__datatypes_config__'] = param_dict[ 'GALAXY_DATATYPES_CONF_FILE'] = self.app.datatypes_registry.integrated_datatypes_configs param_dict['__admin_users__'] = self.app.config.admin_users param_dict['__user__'] = RawObjectWrapper( param_dict.get('__user__', None)) def __populate_unstructured_path_rewrites(self, param_dict): def rewrite_unstructured_paths(input_values, input): if isinstance(input, SelectToolParameter): input_values[input.name] = SelectToolParameterWrapper( input, input_values[input.name], self.app, other_values=param_dict, path_rewriter=self.unstructured_path_rewriter) if not self.tool.check_values and self.unstructured_path_rewriter: # The tools weren't "wrapped" yet, but need to be in order to get # the paths rewritten. self.__walk_inputs(self.tool.inputs, param_dict, rewrite_unstructured_paths) def __sanitize_param_dict(self, param_dict): """ Sanitize all values that will be substituted on the command line, with the exception of ToolParameterValueWrappers, which already have their own specific sanitization rules and also exclude special-cased named values. We will only examine the first level for values to skip; the wrapping function will recurse as necessary. Note: this method follows the style of the similar populate calls, in that param_dict is modified in-place. """ # chromInfo is a filename, do not sanitize it. skip = ['chromInfo'] + self.tool.template_macro_params.keys() if not self.tool or not self.tool.options or self.tool.options.sanitize: for key, value in param_dict.items(): if key not in skip: # Remove key so that new wrapped object will occupy key slot del param_dict[key] # And replace with new wrapped key param_dict[wrap_with_safe_string( key, no_wrap_classes=ToolParameterValueWrapper )] = wrap_with_safe_string( value, no_wrap_classes=ToolParameterValueWrapper) def build(self): """ Build runtime description of job to execute, evaluate command and config templates corresponding to this tool with these inputs on this compute environment. """ self.extra_filenames = [] self.command_line = None try: self.__build_config_files() except Exception, e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Config Files", e) raise e try: self.__build_param_file() except Exception, e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Param File", e) raise e
self.__build_config_files( ) except Exception, e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Config Files", e) raise e try: self.__build_param_file( ) except Exception, e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Param File", e) raise e try: self.__build_command_line( ) except Exception, e: # capture and log parsing errors global_tool_errors.add_error(self.tool.config_file, "Building Command Line", e) raise e try: self.__build_environment_variables() except Exception, e: global_tool_errors.add_error(self.tool.config_file, "Building Environment Variables", e) raise e return self.command_line, self.extra_filenames, self.environment_variables def __build_command_line( self ): """ Build command line to invoke this tool given a populated param_dict """ command = self.tool.command param_dict = self.param_dict