def _special_case(param_dict, tool_id, user_id, user_roles): """" Tools to block before starting to run """ if get_tool_id(tool_id).startswith('interactive_tool_') and user_id == -1: raise JobMappingException( "This tool is restricted to registered users, " "please contact a site administrator") if get_tool_id(tool_id).startswith( 'interactive_tool_ml' ) and 'interactive-tool-ml-jupyter-notebook' not in user_roles: raise JobMappingException( "This tool is restricted to authorized users, " "please contact a site administrator") if get_tool_id(tool_id).startswith('gmx_sim'): md_steps_limit = 1000000 if 'md_steps' in param_dict['sets']['mdp']: if param_dict['sets']['mdp'][ 'md_steps'] > md_steps_limit and 'gmx_sim_powerusers' not in user_roles: raise JobMappingException( "this tool's configuration has exceeded a computational limit, " "please contact a site administrator") return
def dynamic_greenfield_select(app, tool, job, user_email): destination = None tool_id = tool.id if '/' in tool.id: # extract short tool id from tool shed id tool_id = tool.id.split('/')[-2] if user_email is None: raise JobMappingException('Please log in to use this tool.') param_dict = dict([(p.name, p.value) for p in job.parameters]) param_dict = tool.params_from_strings(param_dict, app) if '__job_resource' in param_dict and param_dict['__job_resource'][ '__job_resource__select'] == 'yes': resource_key = None for resource_key in param_dict['__job_resource'].keys(): if resource_key in RESOURCE_KEYS: break else: log.warning( '(%s) Greenfield dynamic plugin did not find a valid resource key, keys were: %s', job.id, param_dict['__job_resource'].keys()) raise JobMappingException(FAILURE_MESSAGE) destination_id = param_dict['__job_resource'][resource_key] if destination_id not in VALID_DESTINATIONS: log.warning( '(%s) Greenfield dynamic plugin got an invalid destination: %s', job.id, destination_id) raise JobMappingException(FAILURE_MESSAGE) if destination_id == GREENFIELD_NORMAL_DESTINATION: destination = app.job_config.get_destination( GREENFIELD_NORMAL_DESTINATION) elif destination_id == GREENFIELD_DEVELOPMENT_DESTINATION: destination = app.job_config.get_destination( GREENFIELD_DEVELOPMENT_DESTINATION) else: # default to 15 cpus in the regular queue destination = app.job_config.get_destination( GREENFIELD_NORMAL_DESTINATION) if destination is None: log.error( '"(%s) greenfield_select dynamic plugin did not set a destination', job.id) raise JobMappingException(FAILURE_MESSAGE) log.debug( "(%s) greenfield_select dynamic plugin returning '%s' destination", job.id, destination.id) log.debug(" submit_native_specification is: %s", destination.params['submit_native_specification']) return destination
def gateway(tool_id, user, memory_scale=1.0, next_dest=None): # And run it. if user: user_roles = [ role.name for role in user.all_roles() if not role.deleted ] user_preferences = user.extra_preferences email = user.email user_id = user.id else: user_roles = [] user_preferences = [] email = '' user_id = -1 if get_tool_id(tool_id).startswith('interactive_tool_') and user_id == -1: raise JobMappingException( "This tool is restricted to registered users, " "please contact a site administrator") if get_tool_id(tool_id).startswith( 'interactive_tool_ml' ) and 'interactive-tool-ml-jupyter-notebook' not in user_roles: raise JobMappingException( "This tool is restricted to authorized users, " "please contact a site administrator") try: env, params, runner, spec, tags = _gateway(tool_id, user_preferences, user_roles, user_id, email, memory_scale=memory_scale) except Exception as e: return JobMappingException(str(e)) resubmit = [] if next_dest: resubmit = [{ 'condition': 'any_failure and attempt <= 3', 'destination': next_dest }] name = name_it(spec) return JobDestination( id=name, tags=tags, runner=runner, params=params, env=env, resubmit=resubmit, )
def single_dynamic_memory(app, tool, job): inp_data = dict([(da.name, da.dataset) for da in job.input_datasets]) inp_data.update([(da.name, da.dataset) for da in job.input_library_datasets]) tool_id = tool.id if '/' in tool.id: tool_id = tool.id.split('/')[-2] if tool_id == 'wig_to_bigWig': memfactor = 2.75 elif tool_id == 'bed_to_bigBed': memfactor = 0.5 else: log.warning( "(%s) single_dynamic_memory plugin got invalid tool id: %s", job.id, tool_id) raise JobMappingException(FAILURE_MESSAGE) input_mb = int(inp_data["input1"].get_size()) / 1024 / 1024 required_mb = int(input_mb * memfactor) destination = app.job_config.get_destination(DESTINATION) if required_mb < MEM_DEFAULT: log.debug( "(%s) single_dynamic_memory plugin sending %s job (input1: %s MB) to rodeo with default (%s MB) mem-per-cpu (requires: %s MB)", job.id, tool_id, input_mb, MEM_DEFAULT, required_mb) destination.params['nativeSpecification'] += RODEO_PARAMS elif required_mb < RODEO_MAX_MEM: log.debug( "(%s) single_dynamic_memory plugin sending %s job (input1: %s MB) to rodeo with --mem-per-cpu=%s MB", job.id, tool_id, input_mb, required_mb) destination.params[ 'nativeSpecification'] += RODEO_PARAMS + ' --mem-per-cpu=%s' % required_mb elif required_mb < ROUNDUP_MAX_MEM: log.debug( "(%s) single_dynamic_memory plugin sending %s job (input1: %s MB) to roundup with --mem-per-cpu=%s MB", job.id, tool_id, input_mb, required_mb) destination.params[ 'nativeSpecification'] += ROUNDUP_PARAMS + ' --mem-per-cpu=%s' % required_mb else: log.warning( "(%s) single_dynamic_memory plugin cannot run %s job (input1: %s MB) requiring %s MB", job.id, tool_id, input_mb, required_mb) raise JobMappingException(SIZE_FAILURE_MESSAGE) log.debug( "(%s) single_dynamic_memory dynamic plugin returning '%s' destination", job.id, DESTINATION) log.debug(" nativeSpecification is: %s", destination.params['nativeSpecification']) return destination
def __jetstream_rule(app, job, user_email, rule): destination = None destination_id = None if user_email is None: raise JobMappingException('Please log in to use this tool.') clusters = ','.join(JETSTREAM_DESTINATIONS[rule]['clusters']) native_specification = app.job_config.get_destination(rule).params.get( 'native_specification', '') sbatch_test_cmd = ['sbatch', '--test-only', '--clusters=%s' % clusters ] + native_specification.split() + [TEST_SCRIPT] try: p = subprocess.Popen(sbatch_test_cmd, stderr=subprocess.PIPE) stderr = p.stderr.read() p.wait() assert p.returncode == 0, stderr except: log.exception('Error running sbatch test') raise JobMappingException( 'An error occurred while trying to schedule this job. Please retry it and if it continues to fail, report it to an administrator using the bug icon.' ) # There is a race condition here, of course. But I don't have a better solution. node = stderr.split()[-1] for i, prefix in enumerate( JETSTREAM_DESTINATIONS[rule]['cluster_prefixes']): if node.startswith(prefix): cluster = JETSTREAM_DESTINATIONS[rule]['clusters'][i] break else: log.error( "Could not determine the cluster of node '%s', clusters are: '%s'", node, clusters) raise JobMappingException( 'An error occurred while trying to schedule this job. Please retry it and if it continues to fail, report it to an administrator using the bug icon.' ) destination_id = '%s_%s' % (cluster.replace( '-', '_'), JETSTREAM_DESTINATIONS[rule]['partition']) destination = app.job_config.get_destination(destination_id) log.debug("(%s) Jetstream dynamic plugin '%s' returning '%s' destination", job.id, rule, destination_id) if destination is not None and 'nativeSpecification' in destination.params: log.debug(" nativeSpecification is: %s", destination.params['nativeSpecification']) return destination or destination_id
def gateway(tool_id, user, memory_scale=1.0, next_dest=None): # And run it. if user: user_roles = [role.name for role in user.all_roles() if not role.deleted] email = user.email user_id = user.id else: user_roles = [] email = '' user_id = -1 try: env, params, runner, spec = _gateway(tool_id, user_roles, user_id, email, memory_scale=memory_scale) except Exception as e: return JobMappingException(str(e)) resubmit = [] if next_dest: resubmit = [{ 'condition': 'any_failure and attempt <= 3', 'destination': next_dest }] name = name_it(spec) return JobDestination( id=name, runner=runner, params=params, env=env, resubmit=resubmit, )
def dynamic_cores_time(app, tool, job, user_email): destination = None destination_id = 'slurm' # build the param dictionary param_dict = job.get_param_values(app) if param_dict.get('__job_resource_select') != 'yes': log.info( "Job resource parameteres not selected, returning default desination" ) return destination_id # handle job resource parameters try: # validate params cores = int(param_dict['__job_resource']['cores']) time = int(param_dict['__job_resource']['time']) destination_id = DESTINATION_IDS[cores] destination = app.job_config.get_destination(destination_id) # set walltime if 'nativeSpecification' not in destination.params: destination.params['nativeSpecification'] = '' destination.params['nativeSpecification'] += ' --time=%s:00:00' % time except: # resource param selector not sent with tool form, job_conf.xml misconfigured log.warning('(%s) error, keys were: %s', job.id, param_dict.keys()) raise JobMappingException(FAILURE_MESSAGE) log.info('returning destination: %s', destination_id) # log.info('native specification: %s, 'destination.params.get('nativeSpecification')) return destination or destination_id
def __resource(params, key, valid): # resource_params is guaranteed to be a dict resource = params.get(key) if resource and resource not in valid: log.warning('(%s) dynamic rule got an invalid resource: %s', job.id, resource) raise JobMappingException('An invalid resource was selected') return resource
def gateway_for_keras_train_eval(app, job, tool, user, next_dest=None): """" Type of compute resource (CPU or GPU) for keras_train_eval tool depends on user's input from its wrapper. Default resource is CPU. """ param_dict = dict([(p.name, p.value) for p in job.parameters]) param_dict = tool.params_from_strings(param_dict, app) tool_id = tool.id if user: user_roles = [ role.name for role in user.all_roles() if not role.deleted ] user_preferences = user.extra_preferences email = user.email user_id = user.id else: user_roles = [] user_preferences = [] email = '' user_id = -1 # get default job destination parameters try: env, params, runner, spec, tags = _gateway(tool_id, user_preferences, user_roles, user_id, email) except Exception as e: return JobMappingException(str(e)) # set up to resubmit job in case of failure resubmit = [] if next_dest: resubmit = [{ 'condition': 'any_failure and attempt <= 3', 'destination': next_dest }] name = name_it(spec) # assign dynamic runner based on user's input from tool wrapper if '__job_resource' in param_dict: if 'gpu' in param_dict['__job_resource']: if param_dict['__job_resource']['gpu'] == '1': params['requirements'] = 'GalaxyGroup == "compute_gpu"' params['request_gpus'] = 1 # env.append({'name': 'GPU_AVAILABLE', 'value': '1'}) # create dynamic destination rule return JobDestination( id=name, tags=tags, runner=runner, params=params, env=env, resubmit=resubmit, )
def gateway_for_hifism(app, job, tool, user): """" The memory requirement of Hifiasm depends on a wrapper's input """ param_dict = dict([(p.name, p.value) for p in job.parameters]) param_dict = tool.params_from_strings(param_dict, app) tool_id = tool.id if user: user_roles = [ role.name for role in user.all_roles() if not role.deleted ] user_preferences = user.extra_preferences email = user.email user_id = user.id else: user_roles = [] user_preferences = [] email = '' user_id = -1 try: env, params, runner, spec, tags = _gateway(tool_id, user_preferences, user_roles, user_id, email, ft=FAST_TURNAROUND, special_tools=SPECIAL_TOOLS, memory_scale=memory_scale) except Exception as e: return JobMappingException(str(e)) limits = _get_limits(runner) request_memory = str( min(_compute_memory_for_hifiasm(param_dict), limits.get('mem'))) + 'G' params['request_memory'] = request_memory resubmit = [] if next_dest: resubmit = [{ 'condition': 'any_failure and attempt <= 3', 'destination': next_dest }] name = name_it(spec) return JobDestination( id=name, tags=tags, runner=runner, params=params, env=env, resubmit=resubmit, )
def cluster_heavy(user, tool): # user is a galaxy.model.User object or None # tool is a galaxy.tools.Tool object if user is None: raise JobMappingException('You must login to use this tool!') else: required_role = "IBPS" final_destination = "cluster_heavy_ibps" # Check that the required_role is in the set of role names associated with the user user_roles = user.all_roles() # a list of galaxy.model.Role objects user_in_role = required_role in [role.name for role in user_roles] if not user_in_role: return "cluster_heavy" else: return final_destination
def __parse_resource_selector(param_dict): # handle job resource parameters try: # validate params cores = int(param_dict['__job_resource']['cores']) time = int(param_dict['__job_resource']['time']) destination_id = param_dict['__job_resource'][ 'tacc_compute_resource_advanced'] assert destination_id in VALID_DESTINATIONS return (cores, time, destination_id) except: # resource param selector not sent with tool form, job_conf.xml misconfigured log.exception('(%s) job resource error, keys were: %s', job.id, param_dict.keys()) raise JobMappingException(FAILURE_MESSAGE)
def dynamic_rnastar(app, tool, job, user_email): tool_id = tool.id if '/' in tool.id: # extract short tool id from tool shed id tool_id = tool.id.split('/')[-2] param_dict = dict([(p.name, p.value) for p in job.parameters]) param_dict = tool.params_from_strings(param_dict, app) destination_id = None try: destination_id, mem_mb = _rnastar(app, param_dict, destination_id, False, job.id) except Exception: log.exception('(%s) Error determining parameters for STAR job', job.id) raise JobMappingException(FAILURE_MESSAGE) log.debug("(%s) STAR dynamic plugin returning '%s' destination", job.id, destination_id) return destination_id
def gateway(tool_id, user, memory_scale=1.0, next_dest=None): if user: user_roles = [ role.name for role in user.all_roles() if not role.deleted ] user_preferences = user.extra_preferences email = user.email user_id = user.id else: user_roles = [] user_preferences = [] email = '' user_id = -1 try: env, params, runner, spec, tags = _gateway(tool_id, user_preferences, user_roles, user_id, email, ft=FAST_TURNAROUND, special_tools=SPECIAL_TOOLS, memory_scale=memory_scale) except Exception as e: return JobMappingException(str(e)) resubmit = [] if next_dest: resubmit = [{ 'condition': 'any_failure and attempt <= 3', 'destination': next_dest }] name = name_it(spec) return JobDestination( id=name, tags=tags, runner=runner, params=params, env=env, resubmit=resubmit, )
def job_router(app, job, tool, resource_params, user): tool_mapping = None envs = [] spec = {} login_required = False destination_id = None destination = None container_override = None if JOB_ROUTER_CONF_FILE is None: __set_job_router_conf_file_path(app) # build the param dictionary param_dict = job.get_param_values(app) local.log = JobLogger(log, {'job_id': job.id}) local.log.debug("param dict for execution of tool '%s': %s", tool.id, param_dict) # find any mapping for this tool and params # tool_mapping = an item in tools[iool_id] in job_router_conf yaml tool_mapping = __tool_mapping(app, tool.id, param_dict) if tool_mapping: spec = tool_mapping.get('spec', {}).copy() envs = tool_mapping.get('env', []).copy() login_required = tool_mapping.get('login_required', False) container_override = tool_mapping.get('container_override', None) tool_id = __short_tool_id(tool.id) if login_required and user is None: raise JobMappingException('Please log in to use this tool') user_email = None if user is None else user.email # resource_params is an empty dict if not set if resource_params: local.log.debug("Job resource parameters selected: %s", resource_params) destination_id, user_spec = __parse_resource_selector( app, job, user_email, resource_params) if spec and user_spec: local.log.debug( "Mapped spec for tool '%s' was (prior to resource param selection): %s", tool_id, spec) spec.update(user_spec) local.log.debug( "Spec for tool '%s' after resource param selection: %s", tool_id, spec or 'none') elif (__is_training_history(job, tool_id) or __user_in_training( app, user)) and __is_training_compatible_tool(tool_id): destination_id = __training_tool_mapping(tool_id) local.log.info("User %s is in a training, mapped to destination: %s", user_email, destination_id) # bypass any group mappings and just pick a destination if the supplied dest is a list destination_id = __resolve_destination_list(app, job, destination_id) elif tool_mapping and tool_mapping.get('destination'): destination_id = tool_mapping['destination'] destination_id = __resolve_destination(app, job, user_email, destination_id) local.log.debug( "Tool '%s' mapped to '%s' native specification overrides: %s", tool_id, destination_id, spec or 'none') if destination_id is None: if __is_galaxy_lib_tool(tool_id): # TODO: should this be a mapping or something? e.g. s/$/_galaxy_env/ so that their regular tool mapping # (16 GB or whatever) still applies tool_mapping = __tool_mapping(app, '_galaxy_lib_', {}) destination_id = tool_mapping['destination'] local.log.debug("'%s' is a Galaxy lib too, using destination '%s'", tool_id, destination_id) else: tool_mapping = __tool_mapping(app, '_default_', {}) destination_id = tool_mapping['destination'] local.log.debug( "'%s' has no destination mapping, using default destination '%s'", tool_id, destination_id) destination_id = __resolve_destination(app, job, user_email, destination_id) local.log.debug('Final destination after resolution is: %s', destination_id) destination = app.job_config.get_destination(destination_id) # TODO: requires native spec to be set on all dests, you could do this by plugin instead native_spec_param = __native_spec_param(destination) if native_spec_param: native_spec = destination.params.get(native_spec_param, '') native_spec = __update_native_spec(destination_id, spec, native_spec) destination.params[native_spec_param] = native_spec elif spec: local.log.warning( "Could not determine native spec param for destination '%s', spec will not be applied: %s", destination.id, destination.params) __update_env(destination, envs) if container_override: destination.params['container_override'] = container_override local.log.debug("Container override from tool mapping: %s", container_override) local.log.info('Returning destination: %s', destination_id) local.log.info('Native specification: %s', destination.params.get(native_spec_param)) return destination
def __rule(app, tool, job, user_email, resource): destination = None destination_id = None default_destination_id = RESOURCES[resource][0] tool_id = tool.id if '/' in tool.id: # extract short tool id from tool shed id tool_id = tool.id.split('/')[-2] if user_email is None: raise JobMappingException('Please log in to use this tool.') param_dict = dict([(p.name, p.value) for p in job.parameters]) param_dict = tool.params_from_strings(param_dict, app) # Explcitly set the destination if the user has chosen to do so with the resource selector if '__job_resource' in param_dict: if param_dict['__job_resource']['__job_resource__select'] == 'yes': resource_key = None for resource_key in param_dict['__job_resource'].keys(): if resource_key == resource: destination_id = param_dict['__job_resource'][resource_key] if destination_id in RESOURCES[resource_key]: break elif destination_id == TEAM_DESTINATION: break else: log.warning( '(%s) Destination/walltime dynamic plugin got an invalid destination: %s', job.id, destination_id) raise JobMappingException(FAILURE_MESSAGE) else: log.warning( '(%s) Destination/walltime dynamic plugin got an invalid value for selector: %s', job.id, param_dict['__job_resource']['__job_resource__select']) raise JobMappingException(FAILURE_MESSAGE) elif param_dict['__job_resource']['__job_resource__select'] == 'no': # job will be sent to the default if user_email.lower() in NORM_RESERVED_USERS: log.info( "(%s) Destination/walltime dynamic plugin returning default reserved destination for '%s'", job.id, user_email) return RESERVED_DESTINATION else: log.warning( '(%s) Destination/walltime dynamic plugin did not find a valid resource key, keys were: %s', job.id, param_dict['__job_resource'].keys()) raise JobMappingException(FAILURE_MESSAGE) else: log.warning( '(%s) Destination/walltime dynamic plugin did not receive the __job_resource param, keys were: %s', job.id, param_dict.keys()) raise JobMappingException(FAILURE_MESSAGE) if destination_id == TEAM_DESTINATION: if user_email in TEAM_USERS: destination_id = TEAM_DESTINATION destination = app.job_config.get_destination(TEAM_DESTINATION) destination.params[ 'nativeSpecification'] += ' --ntasks=%s' % param_dict[ '__job_resource']['team_cpus'] else: log.warning( "(%s) Unauthorized user '%s' selected team development destination", job.id, user_email) destination_id = LOCAL_DESTINATION # Only allow stampede if a cached reference is selected if destination_id in STAMPEDE_DESTINATIONS and tool_id in PUNT_TOOLS: for p in GENOME_SOURCE_PARAMS: subpd = param_dict.copy() # walk the param dict try: for i in p.split('.'): subpd = subpd[i] assert subpd in GENOME_SOURCE_VALUES log.info( '(%s) Destination/walltime dynamic plugin detected indexed reference selected, job will be sent to Stampede', job.id) break except: pass else: log.info( '(%s) User requested Stampede but destination/walltime dynamic plugin did not detect selection of an indexed reference, job will be sent to local cluster instead', job.id) if destination_id == STAMPEDE_DEVELOPMENT_DESTINATION: destination_id = LOCAL_DEVELOPMENT_DESTINATION else: destination_id = default_destination_id # Some tools do not react well to Jetstream if destination_id is None and tool_id not in JETSTREAM_TOOLS: log.info( '(%s) Default destination requested and tool is not in Jetstream-approved list, job will be sent to local cluster', job.id) destination_id = default_destination_id # Only allow jetstream if a cached reference is not selected #if destination_id in JETSTREAM_DESTINATIONS + (None,) and tool_id in PUNT_TOOLS: # for p in GENOME_SOURCE_PARAMS: # subpd = param_dict.copy() # # walk the param dict # try: # for i in p.split('.'): # subpd = subpd[i] # assert subpd not in GENOME_SOURCE_VALUES # log.info('(%s) Destination/walltime dynamic plugin detected history reference selected, job will be allowed on Jetstream', job.id) # break # except: # pass # else: # log.info('(%s) User requested Jetstream or default but destination/walltime dynamic plugin did not detect selection of a history reference, job will be sent to local cluster instead', job.id) # destination_id = default_destination_id # Need to explicitly pick a destination because of staging. Otherwise we # could just submit with --clusters=a,b,c and let slurm sort it out if destination_id in JETSTREAM_DESTINATIONS + ( None, ) and default_destination_id == LOCAL_DESTINATION: test_destination_id = destination_id or default_destination_id clusters = ','.join( JETSTREAM_DESTINATION_MAPS[test_destination_id]['clusters']) native_specification = app.job_config.get_destination( test_destination_id).params.get('nativeSpecification', '') sbatch_test_cmd = [ 'sbatch', '--test-only', '--clusters=%s' % clusters ] + native_specification.split() + [TEST_SCRIPT] log.debug('Testing job submission to determine suitable cluster: %s', ' '.join(sbatch_test_cmd)) try: p = subprocess.Popen(sbatch_test_cmd, stderr=subprocess.PIPE) stderr = p.stderr.read() p.wait() assert p.returncode == 0, stderr except: log.exception('Error running sbatch test') raise JobMappingException( 'An error occurred while trying to schedule this job. Please retry it and if it continues to fail, report it to an administrator using the bug icon.' ) # There is a race condition here, of course. But I don't have a better solution. node = stderr.split()[-1] for i, prefix in enumerate( JETSTREAM_DESTINATION_MAPS[test_destination_id] ['cluster_prefixes']): if node.startswith(prefix): cluster = JETSTREAM_DESTINATION_MAPS[test_destination_id][ 'clusters'][i] destination_prefix = JETSTREAM_DESTINATION_MAPS[ test_destination_id]['destination_prefixes'][i] break else: log.error( "Could not determine the cluster of node '%s', clusters are: '%s'", node, clusters) raise JobMappingException( 'An error occurred while trying to schedule this job. Please retry it and if it continues to fail, report it to an administrator using the bug icon.' ) destination_id = '%s_%s' % ( destination_prefix, JETSTREAM_DESTINATION_MAPS[default_destination_id]['partition']) destination = app.job_config.get_destination(destination_id) if destination_id is None: destination_id = default_destination_id # Set a walltime if local is the destination and this is a dynamic walltime tool if destination_id == LOCAL_DESTINATION and tool_id in RUNTIMES: destination_id = LOCAL_WALLTIME_DESTINATION #walltime = datetime.timedelta(seconds=(RUNTIMES[tool_id]['runtime'] + (RUNTIMES[tool_id]['stddev'] * RUNTIMES[tool_id]['devs'])) * 60) walltime = '36:00:00' destination = app.job_config.get_destination( LOCAL_WALLTIME_DESTINATION) destination.params['nativeSpecification'] += ' --time=%s' % str( walltime).split('.')[0] log.debug( "(%s) Destination/walltime dynamic plugin returning '%s' destination", job.id, destination_id) if destination is not None and 'nativeSpecification' in destination.params: log.debug(" nativeSpecification is: %s", destination.params['nativeSpecification']) return destination or destination_id
def __rule(app, tool, job, user_email, resource_params, resource): destination = None destination_id = None default_destination_id = RESOURCES[resource][0] explicit_destination = False tool_id = tool.id if '/' in tool.id: # extract short tool id from tool shed id tool_id = tool.id.split('/')[-2] if user_email is None: raise JobMappingException('Please log in to use this tool.') param_dict = dict([(p.name, p.value) for p in job.parameters]) param_dict = tool.params_from_strings(param_dict, app) # Explcitly set the destination if the user has chosen to do so with the resource selector if resource_params: resource_key = None for resource_key in resource_params.keys(): if resource_key == resource: destination_id = resource_params[resource_key] if destination_id in RESOURCES[resource_key]: explicit_destination = True break elif destination_id == TEAM_DESTINATION: explicit_destination = True break else: log.warning( '(%s) Destination/walltime dynamic plugin got an invalid destination: %s', job.id, destination_id) raise JobMappingException(FAILURE_MESSAGE) else: log.warning( '(%s) Destination/walltime dynamic plugin did not receive a valid resource key, resource params were: %s', job.id, resource_params) raise JobMappingException(FAILURE_MESSAGE) else: # if __job_resource is not passed or __job_resource_select is not set to a "yes-like" value, resource_params is an empty dict if user_email.lower() in NORM_RESERVED_USERS: log.info( "(%s) Destination/walltime dynamic plugin returning default reserved destination for '%s'", job.id, user_email) #return RESERVED_DESTINATION destination_id = RESERVED_DESTINATION if destination_id == TEAM_DESTINATION: if user_email in TEAM_USERS: destination_id = TEAM_DESTINATION destination = app.job_config.get_destination(TEAM_DESTINATION) destination.params[ 'nativeSpecification'] += ' --ntasks=%s' % resource_params[ 'team_cpus'] else: log.warning( "(%s) Unauthorized user '%s' selected team development destination", job.id, user_email) destination_id = LOCAL_DESTINATION explicit_destination = False # Only allow stampede if a cached reference is selected if destination_id in STAMPEDE_DESTINATIONS and tool_id in PUNT_TOOLS: for p in GENOME_SOURCE_PARAMS: subpd = param_dict.copy() # walk the param dict try: for i in p.split('.'): subpd = subpd[i] assert subpd in GENOME_SOURCE_VALUES log.info( '(%s) Destination/walltime dynamic plugin detected indexed reference selected, job will be sent to Stampede', job.id) break except: pass else: log.info( '(%s) User requested Stampede but destination/walltime dynamic plugin did not detect selection of an indexed reference, job will be sent to local cluster instead', job.id) if destination_id == STAMPEDE_DEVELOPMENT_DESTINATION: destination_id = LOCAL_DEVELOPMENT_DESTINATION else: destination_id = default_destination_id # Some tools do not react well to Jetstream if not explicit_destination and tool_id not in JETSTREAM_TOOLS: log.info( '(%s) Default destination requested and tool is not in Jetstream-approved list, job will be sent to local cluster', job.id) destination_id = default_destination_id # FIXME: this is getting really messy mem_mb = None if resource == 'multi_bridges_compute_resource' and tool_id == 'rna_star': # FIXME: special casing try: _destination_id, mem_mb = _rnastar(app, param_dict, destination_id, explicit_destination, job.id) if not explicit_destination: destination_id = _destination_id if destination_id == BRIDGES_DESTINATION: destination = app.job_config.get_destination( destination_id) destination.params[ 'submit_native_specification'] += ' --time=48:00:00' except: log.exception('(%s) Error determining parameters for STAR job', job.id) raise JobMappingException(FAILURE_MESSAGE) # Need to explicitly pick a destination because of staging. Otherwise we # could just submit with --clusters=a,b,c and let slurm sort it out if destination_id in JETSTREAM_DESTINATIONS + ( None, ) and default_destination_id == LOCAL_DESTINATION: test_destination_id = destination_id or default_destination_id clusters = ','.join( JETSTREAM_DESTINATION_MAPS[test_destination_id]['clusters']) native_specification = app.job_config.get_destination( test_destination_id).params.get('nativeSpecification', '') native_specification = _set_walltime(tool_id, native_specification) if mem_mb: native_specification += ' --mem=%s' % mem_mb sbatch_test_cmd = [ 'sbatch', '--test-only', '--clusters=%s' % clusters ] + native_specification.split() + [TEST_SCRIPT] log.debug( '(%s) Testing job submission to determine suitable cluster: %s', job.id, ' '.join(sbatch_test_cmd)) try: p = subprocess.Popen(sbatch_test_cmd, stderr=subprocess.PIPE) stderr = p.stderr.read() p.wait() assert p.returncode == 0, stderr except: log.exception('Error running sbatch test') raise JobMappingException( 'An error occurred while trying to schedule this job. Please retry it and if it continues to fail, report it to an administrator using the bug icon.' ) # There is a race condition here, of course. But I don't have a better solution. node = stderr.split()[-1] for i, prefix in enumerate( JETSTREAM_DESTINATION_MAPS[test_destination_id] ['cluster_prefixes']): if node.startswith(prefix): cluster = JETSTREAM_DESTINATION_MAPS[test_destination_id][ 'clusters'][i] destination_prefix = JETSTREAM_DESTINATION_MAPS[ test_destination_id]['destination_prefixes'][i] break else: log.error( "Could not determine the cluster of node '%s', clusters are: '%s'", node, clusters) raise JobMappingException( 'An error occurred while trying to schedule this job. Please retry it and if it continues to fail, report it to an administrator using the bug icon.' ) destination_id = '%s_%s' % ( destination_prefix, JETSTREAM_DESTINATION_MAPS[default_destination_id]['partition']) destination = app.job_config.get_destination(destination_id) # FIXME: aaaaah i just need this to work for now if destination_id.startswith('jetstream'): destination.params['submit_native_specification'] = _set_walltime( tool_id, destination.params.get('submit_native_specification', '')) else: destination.params['nativeSpecification'] = _set_walltime( tool_id, destination.params.get('nativeSpecification', '')) if destination_id is None: destination_id = default_destination_id if mem_mb: if destination is None: destination = app.job_config.get_destination(destination_id) # FIXME: and here wow such mess if destination_id in (LOCAL_DESTINATION, LOCAL_DEVELOPMENT_DESTINATION, RESERVED_DESTINATION): destination.params['nativeSpecification'] += ' --mem=%s' % mem_mb elif destination_id.startswith('jetstream'): pass # don't set --mem, you get the whole node anyway elif destination_id in BRIDGES_DESTINATIONS: destination.params[ 'submit_native_specification'] += ' --mem=%s' % mem_mb # Set a walltime if local is the destination and this is a dynamic walltime tool #if destination_id == LOCAL_DESTINATION and tool_id in RUNTIMES: # destination_id = LOCAL_WALLTIME_DESTINATION # #walltime = datetime.timedelta(seconds=(RUNTIMES[tool_id]['runtime'] + (RUNTIMES[tool_id]['stddev'] * RUNTIMES[tool_id]['devs'])) * 60) # walltime = '36:00:00' # destination = app.job_config.get_destination( LOCAL_WALLTIME_DESTINATION ) # destination.params['nativeSpecification'] += ' --time=%s' % str(walltime).split('.')[0] # Allow for overriding the walltime if not destination: destination = app.job_config.get_destination(destination_id) destination.params['nativeSpecification'] = _set_walltime( tool_id, destination.params.get('nativeSpecification', '')) log.debug( "(%s) Destination/walltime dynamic plugin returning '%s' destination", job.id, destination_id) if destination is not None and 'nativeSpecification' in destination.params: log.debug("(%s) nativeSpecification is: %s", job.id, destination.params['nativeSpecification']) return destination or destination_id
def __rule(app, tool, job, user_email, resource_params, resource): destination = None destination_id = None default_destination_id = RESOURCES[resource][0] is_explicit_destination = False tool_id = tool.id if '/' in tool.id: # extract short tool id from tool shed id tool_id = tool.id.split('/')[-2] if user_email is None: raise JobMappingException('Please log in to use this tool.') param_dict = dict([(p.name, p.value) for p in job.parameters]) param_dict = tool.params_from_strings(param_dict, app) # Explcitly set the destination if the user has chosen to do so with the resource selector if resource_params: resource_key = None for resource_key in resource_params.keys(): if resource_key == resource: destination_id = resource_params[resource_key] if destination_id in RESOURCES[resource_key]: is_explicit_destination = True break elif destination_id == TEAM_DESTINATION: is_explicit_destination = True break else: log.warning('(%s) Destination/walltime dynamic plugin got an invalid destination: %s', job.id, destination_id) raise JobMappingException(FAILURE_MESSAGE) else: log.warning('(%s) Destination/walltime dynamic plugin did not receive a valid resource key, resource params were: %s', job.id, resource_params) raise JobMappingException(FAILURE_MESSAGE) else: # if __job_resource is not passed or __job_resource_select is not set to a "yes-like" value, resource_params is an empty dict if user_email.lower() in NORM_RESERVED_USERS: log.info("(%s) Destination/walltime dynamic plugin returning default reserved destination for '%s'", job.id, user_email) destination_id = RESERVED_DESTINATION is_explicit_destination = True if destination_id == TEAM_DESTINATION: if user_email in TEAM_USERS: destination_id = TEAM_DESTINATION destination = app.job_config.get_destination(TEAM_DESTINATION) destination.params['nativeSpecification'] += ' --ntasks=%s' % resource_params['team_cpus'] else: log.warning("(%s) Unauthorized user '%s' selected team development destination", job.id, user_email) destination_id = LOCAL_DESTINATION is_explicit_destination = False #if not is_explicit_destination and user_email in ('*****@*****.**', '*****@*****.**'): # log.info('(%s) Sending job for %s to Jetstream @ IU reserved partition', job.id, user_email) # is_explicit_destination = True # destination_id = 'jetstream_iu_reserved' # Some tools do not react well to Jetstream if not is_explicit_destination and tool_id not in JETSTREAM_TOOLS: log.info('(%s) Default destination requested and tool is not in Jetstream-approved list, job will be sent to local cluster', job.id) destination_id = default_destination_id # FIXME: this is getting really messy mem_mb = None #if resource == 'multi_bridges_compute_resource' and tool_id == 'rna_star': # try: # destination_id, mem_mb = _rnastar(app, param_dict, destination_id, is_explicit_destination, job.id) # if destination_id and destination_id == BRIDGES_DESTINATION: # destination = app.job_config.get_destination(destination_id) # destination.params['submit_native_specification'] += ' --time=48:00:00' # except Exception: # log.exception('(%s) Error determining parameters for STAR job', job.id) # raise JobMappingException(FAILURE_MESSAGE) # Need to explicitly pick a destination because of staging. Otherwise we # could just submit with --clusters=a,b,c and let slurm sort it out if destination_id in JETSTREAM_DESTINATIONS + (None,) and default_destination_id == LOCAL_DESTINATION: test_destination_id = destination_id or default_destination_id clusters = ','.join(JETSTREAM_DESTINATION_MAPS[test_destination_id]['clusters']) native_specification = app.job_config.get_destination(test_destination_id).params.get('nativeSpecification', '') native_specification = _set_walltime(tool_id, native_specification) #if mem_mb: # native_specification += ' --mem=%s' % mem_mb sbatch_test_cmd = ['sbatch', '--test-only', '--clusters=%s' % clusters] + native_specification.split() + [TEST_SCRIPT] log.debug('(%s) Testing job submission to determine suitable cluster: %s', job.id, ' '.join(sbatch_test_cmd)) try: p = subprocess.Popen(sbatch_test_cmd, stderr=subprocess.PIPE) stderr = p.stderr.read().decode(errors='replace') p.wait() assert p.returncode == 0, stderr except Exception: log.exception('Error running sbatch test') raise JobMappingException(SCHEDULE_FAILURE_MESSAGE) # There is a race condition here, of course. But I don't have a better solution. # TODO: make functional node = None for line in stderr.splitlines(): match = re.search(SLURM_TEST_PATTERN, line) if match: node = match.group(1) break else: log.error("Unable to parse test job output: %s", stderr) raise JobMappingException(SCHEDULE_FAILURE_MESSAGE) for i, prefix in enumerate(JETSTREAM_DESTINATION_MAPS[test_destination_id]['cluster_prefixes']): if node.startswith(prefix): # cluster = JETSTREAM_DESTINATION_MAPS[test_destination_id]['clusters'][i] destination_prefix = JETSTREAM_DESTINATION_MAPS[test_destination_id]['destination_prefixes'][i] break else: log.error("Could not determine the cluster of node '%s', clusters are: '%s'", node, clusters) raise JobMappingException(SCHEDULE_FAILURE_MESSAGE) destination_id = '%s_%s' % (destination_prefix, JETSTREAM_DESTINATION_MAPS[default_destination_id]['partition']) destination = app.job_config.get_destination(destination_id) # FIXME: aaaaah i just need this to work for now if destination_id.startswith('jetstream'): destination.params['submit_native_specification'] = _set_walltime(tool_id, destination.params.get('submit_native_specification', '')) else: destination.params['nativeSpecification'] = _set_walltime(tool_id, destination.params.get('nativeSpecification', '')) if destination_id is None: destination_id = default_destination_id if mem_mb: if destination is None: destination = app.job_config.get_destination(destination_id) # FIXME: and here wow such mess if destination_id in (LOCAL_DESTINATION, LOCAL_DEVELOPMENT_DESTINATION, RESERVED_DESTINATION): destination.params['nativeSpecification'] += ' --mem=%s' % mem_mb elif destination_id.startswith('jetstream'): pass # don't set --mem, you get the whole node anyway elif destination_id in BRIDGES_DESTINATIONS: destination.params['submit_native_specification'] += ' --mem=%s' % mem_mb # Set a walltime if local is the destination and this is a dynamic walltime tool #if destination_id == LOCAL_DESTINATION and tool_id in RUNTIMES: # destination_id = LOCAL_WALLTIME_DESTINATION # #walltime = datetime.timedelta(seconds=(RUNTIMES[tool_id]['runtime'] + (RUNTIMES[tool_id]['stddev'] * RUNTIMES[tool_id]['devs'])) * 60) # walltime = '36:00:00' # destination = app.job_config.get_destination( LOCAL_WALLTIME_DESTINATION ) # destination.params['nativeSpecification'] += ' --time=%s' % str(walltime).split('.')[0] # Allow for overriding the walltime if not destination: destination = app.job_config.get_destination(destination_id) destination.params['nativeSpecification'] = _set_walltime(tool_id, destination.params.get('nativeSpecification', '')) log.debug("(%s) Destination/walltime dynamic plugin returning '%s' destination", job.id, destination_id) if destination is not None: if 'nativeSpecification' in destination.params and destination.params['nativeSpecification']: log.debug("(%s) nativeSpecification is: %s", job.id, destination.params['nativeSpecification']) elif 'submit_native_specification' in destination.params and destination.params['submit_native_specification']: log.debug("(%s) submit_native_specification is: %s", job.id, destination.params['submit_native_specification']) return destination or destination_id
def admin_only(app, user_email): # Only allow the tool to be executed if the user is an admin admin_users = app.config.get("admin_users", "").split(",") if user_email not in admin_users: raise JobMappingException("Unauthorized.") return JobDestination(runner="slurm")
def dynamic_bridges_select(app, tool, job, user_email): destination = None tool_id = tool.id if '/' in tool.id: # extract short tool id from tool shed id tool_id = tool.id.split('/')[-2] if user_email is None: raise JobMappingException('Please log in to use this tool.') inp_data = dict([(da.name, da.dataset) for da in job.input_datasets]) inp_data.update([(da.name, da.dataset) for da in job.input_library_datasets]) param_dict = dict([(p.name, p.value) for p in job.parameters]) param_dict = tool.params_from_strings(param_dict, app) if '__job_resource' in param_dict and param_dict['__job_resource'][ '__job_resource__select'] == 'yes': resource_key = None for resource_key in param_dict['__job_resource'].keys(): if resource_key in RESOURCE_KEYS: break else: log.warning( '(%s) Bridges dynamic plugin did not find a valid resource key, keys were: %s', job.id, param_dict['__job_resource'].keys()) raise JobMappingException(FAILURE_MESSAGE) destination_id = param_dict['__job_resource'][resource_key] if destination_id not in VALID_DESTINATIONS: log.warning( '(%s) Bridges dynamic plugin got an invalid destination: %s', job.id, destination_id) raise JobMappingException(FAILURE_MESSAGE) if destination_id == BRIDGES_NORMAL_DESTINATION: destination = app.job_config.get_destination( BRIDGES_NORMAL_DESTINATION) elif destination_id == BRIDGES_DEVELOPMENT_DESTINATION: destination = app.job_config.get_destination( BRIDGES_DEVELOPMENT_DESTINATION) else: # default to 15 cpus in the regular queue destination = app.job_config.get_destination( BRIDGES_NORMAL_DESTINATION) if destination is None: log.error( '"(%s) bridges_select dynamic plugin did not set a destination', job.id) raise JobMappingException(FAILURE_MESSAGE) if destination.id == BRIDGES_NORMAL_DESTINATION: mem = 240 * 1024 # 5 * 48 GB walltime = '24:00:00' if tool_id in ('trinity_psc', 'trinity'): infile = inp_data.get('left_input', None) or inp_data.get( 'input', None) if infile is None: log.error('Trinity submitted without inputs, failing') raise JobMappingException(FAILURE_MESSAGE) insize = infile.get_size() if param_dict.get('additional_params', {}).get('normalize_reads', False): # normalizing: less runtime, less memory if insize < (10 * 1024**3): mem = 240 * 1024 # 5 * 48 GB walltime = '72:00:00' elif insize < (100 * 1024**3): mem = 480 * 1024 # 10 * 48 GB walltime = '96:00:00' else: mem = 720 * 1024 # 15 * 48 GB walltime = '96:00:00' else: # increased runtime, increased memory if insize < (10 * 1024**3): mem = 480 * 1024 # 10 * 48 GB walltime = '96:00:00' elif insize < (100 * 1024**3): mem = 720 * 1024 # 15 * 48 GB walltime = '96:00:00' else: mem = 960 * 1024 # 20 * 48 GB walltime = '96:00:00' elif tool_id == 'unicycler': # SPAdes uses at most 250GB mem = 288 * 1024 walltime = '48:00:00' stack_ulimit = 24576 destination.env.append({ 'name': None, 'file': None, 'execute': 'ulimit -s %d' % stack_ulimit, 'value': None, 'raw': False, }) log.debug('(%s) will execute `ulimit -s %d`', job.id, stack_ulimit) else: # nothing to go off of yet so we'll just guess mem = 480 * 1024 walltime = '48:00:00' destination.env.append({ 'name': 'GALAXY_MEMORY_MB', 'file': None, 'execute': None, 'value': str(mem), 'raw': False, }) log.debug("(%s) set $GALAXY_MEMORY_MB to %s", job.id, mem) destination.params[ 'submit_native_specification'] += ' --time=%s' % walltime destination.params['submit_native_specification'] += ' --mem=%s' % mem log.debug("(%s) bridges_select dynamic plugin returning '%s' destination", job.id, destination.id) log.debug(" submit_native_specification is: %s", destination.params['submit_native_specification']) return destination