def url_to_destination(self, url): """Convert a legacy URL to a job destination""" if not url: return native_spec = url.split('/')[2] if native_spec: params = dict(nativeSpecification=native_spec) log.debug( "Converted URL '%s' to destination runner=drmaa, params=%s" % (url, params)) return JobDestination(runner='drmaa', params=params) else: log.debug("Converted URL '%s' to destination runner=drmaa" % url) return JobDestination(runner='drmaa')
def __recover_job_wrapper(self, job): # Already dispatched and running job_wrapper = self.job_wrapper(job) # Use the persisted destination as its params may differ from # what's in the job_conf xml job_destination = JobDestination(id=job.destination_id, runner=job.job_runner_name, params=job.destination_params) # resubmits are not persisted (it's a good thing) so they # should be added back to the in-memory destination on startup try: config_job_destination = self.app.job_config.get_destination(job.destination_id) job_destination.resubmit = config_job_destination.resubmit except KeyError: log.debug('(%s) Recovered destination id (%s) does not exist in job config (but this may be normal in the case of a dynamically generated destination)', job.id, job.destination_id) job_wrapper.job_runner_mapper.cached_job_destination = job_destination return job_wrapper
def gateway(tool_id, user, memory_scale=1.0): # And run it. if user: user_roles = [ role.name for role in user.all_roles() if not role.deleted ] email = user.email else: user_roles = [] email = '' try: env, params, runner, spec = _gateway2(tool_id, user_roles, email, memory_scale=memory_scale) except requests.exceptions.RequestException: # We really failed, so fall back to old algo. env, params, runner, spec = _gateway(tool_id, user_roles, email, memory_scale=memory_scale) name = name_it(spec) return JobDestination(id=name, runner=runner, params=params, env=env, resubmit=[{ 'condition': 'any_failure', 'destination': 'resubmit_gateway', }])
def gateway(tool_id, user, memory_scale=1.0, next_dest=None): # And run it. if user: user_roles = [role.name for role in user.all_roles() if not role.deleted] email = user.email user_id = user.id else: user_roles = [] email = '' user_id = -1 try: env, params, runner, spec = _gateway(tool_id, user_roles, user_id, email, memory_scale=memory_scale) except Exception as e: return JobMappingException(str(e)) resubmit = [] if next_dest: resubmit = [{ 'condition': 'any_failure and attempt <= 3', 'destination': next_dest }] name = name_it(spec) return JobDestination( id=name, runner=runner, params=params, env=env, resubmit=resubmit, )
def default_dynamic_job_wrapper(job): #Allocate the number of cpus based on the number available (by instance size) cpus_avail = multiprocessing.cpu_count() cpunum = cpus_avail/2 cpunum = _adjustcpus(cpunum) cpu_str = "--ntasks=" + str(cpunum) return JobDestination(runner="slurm", params={"nativeSpecification": cpu_str})
def dexseq_memory_mapper( job, tool ): # Assign admin users' jobs to special admin_project. # Allocate extra time inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] ) inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] ) gtf_file = inp_data[ "gtf" ].file_name vmem = 5200 cores = 6 params = {} gtf_file_size = os.path.getsize(gtf_file) / (1024*1024.0) if gtf_file_size > 150: vmem = 30000 cores = 6 # TODO(hxr): fix? # params["nativeSpecification"] = """ # -q galaxy1.q,all.q -l galaxy1_slots=1 -l h_vmem=%sM -pe "pe*" %s -v # _JAVA_OPTIONS -v TEMP -v TMPDIR -v PATH -v PYTHONPATH -v # LD_LIBRARY_PATH -v XAPPLRESDIR -v GDFONTPATH -v GNUPLOT_DEFAULT_GDFONT # -v MPLCONFIGDIR -soft -l galaxy1_dedicated=1 # """ % (vmem, cores) params['request_memory'] = vmem / 1024 params['request_cpus'] = cores params['requirements'] = '(GalaxyGroup == "compute")' params['priority'] = 128 env = { '_JAVA_OPTIONS': "-Xmx4G -Xms1G", } return JobDestination(id="dexseq_dynamic_memory_mapping", runner="condor", params=params, env=env)
def setSGEpriority(app, user, user_email): admin_users = app.config.get("admin_users", "").split(",") try: roles = [role.name for role in user.all_roles()] except: roles = [] params = {} """ User priorities from TOP to BOTTOM. """ # ADMINS if user_email in admin_users: params["nativeSpecification"] = "-p 1000" # TEACHERS elif "teacher" in roles: params["nativeSpecification"] = "-p 500" # PUPILS elif "pupil" in roles: params["nativeSpecification"] = "-p 10" # REGISTERED USERS elif user_email: params["nativeSpecification"] = "-p -100" # UNREGISTERED USERS else: params["nativeSpecification"] = "-p -600" return JobDestination(runner="drmaa", params=params)
def get_destination(app, referrer=None, cloudlaunch_api_endpoint=None, cloudlaunch_api_token=None, pulsar_runner_id="pulsar", pulsar_file_action_config=None, fallback_destination_id=None): """ Returns an available Pulsar JobDestination by querying cloudlaunch. If no Pulsar server is available, returns a fallback destination (if specified by the user) or raises a JobNotReadyException, so that the job will be rescheduled. """ url, token = get_next_server(cloudlaunch_api_endpoint, cloudlaunch_api_token) if url: if referrer: resubmit_dest = referrer.get('resubmit') return JobDestination( runner=pulsar_runner_id, params={"url": url, "private_token": token, "file_action_config": pulsar_file_action_config, }, resubmit=resubmit_dest) elif fallback_destination_id: return fallback_destination_id else: raise JobNotReadyException # This will attempt to reschedule the job
def __setup_resources(resource_params, settings): resource_params['docker_enabled'] = True __check_resource_params(resource_params, resource_type='cpu') __check_resource_params(resource_params, resource_type='memory') __merge_into_res_params(resource_params, settings, resource_type='cpu') __merge_into_res_params(resource_params, settings, resource_type='memory') return JobDestination(runner="k8s", params=resource_params)
def k8s_container_mapper(tool, referrer, k8s_runner_id="k8s"): params = dict(referrer.params) params['docker_enabled'] = True # For backwards compatibility: unnest parameters under "container" cont_map = params.pop("container", {}) params.update(cont_map) # 1. First, apply the default resource set (if defined) as job params. # These will be overridden later by individual tool mappings. default_resource_set_name = _get_default_resource_set_name() if default_resource_set_name: params.update(_map_resource_set(default_resource_set_name)) # 2. Next, apply resource mappings for individual tools, overwriting the # defaults. if not _apply_rule_mappings(tool, params): # 3. If no explicit rule mapping was defined, and it's a tool that # requires galaxy_lib, force the default container. Otherwise, # Galaxy's default container resolution will apply. if tool.id in GALAXY_LIB_TOOLS_UNVERSIONED or ( "CONVERTER_" in tool.id and "CONVERTER_" == tool.id[:10]): default_container = params.get('docker_default_container_id') if default_container: params['docker_container_id_override'] = default_container log.debug("[k8s_container_mapper] Dispatching to %s with params %s" % (k8s_runner_id, params)) return JobDestination(runner=k8s_runner_id, params=params)
def dynamic_chain_1(): # Check whether chaining dynamic job destinations work return JobDestination(runner="dynamic", params={ 'type': 'python', 'function': 'dynamic_chain_2', 'test_param': 'my_test_param' })
def dyndest_chain_2(tmp_dir_prefix): # Chain to yet a third return JobDestination(runner="dynamic", params={ 'type': 'python', 'function': 'dyndest_chain_3', 'rules_module': 'integration.chained_dyndest_rules.module3', 'tmp_dir_prefix_two': '%sand2' % tmp_dir_prefix })
def gateway_for_keras_train_eval(app, job, tool, user, next_dest=None): """" Type of compute resource (CPU or GPU) for keras_train_eval tool depends on user's input from its wrapper. Default resource is CPU. """ param_dict = dict([(p.name, p.value) for p in job.parameters]) param_dict = tool.params_from_strings(param_dict, app) tool_id = tool.id if user: user_roles = [ role.name for role in user.all_roles() if not role.deleted ] user_preferences = user.extra_preferences email = user.email user_id = user.id else: user_roles = [] user_preferences = [] email = '' user_id = -1 # get default job destination parameters try: env, params, runner, spec, tags = _gateway(tool_id, user_preferences, user_roles, user_id, email) except Exception as e: return JobMappingException(str(e)) # set up to resubmit job in case of failure resubmit = [] if next_dest: resubmit = [{ 'condition': 'any_failure and attempt <= 3', 'destination': next_dest }] name = name_it(spec) # assign dynamic runner based on user's input from tool wrapper if '__job_resource' in param_dict: if 'gpu' in param_dict['__job_resource']: if param_dict['__job_resource']['gpu'] == '1': params['requirements'] = 'GalaxyGroup == "compute_gpu"' params['request_gpus'] = 1 # env.append({'name': 'GPU_AVAILABLE', 'value': '1'}) # create dynamic destination rule return JobDestination( id=name, tags=tags, runner=runner, params=params, env=env, resubmit=resubmit, )
def url_to_destination(self, url): params = {} shell_params, job_params = url.split('/')[2:4] # split 'foo=bar&baz=quux' into { 'foo' : 'bar', 'baz' : 'quux' } shell_params = dict([('shell_' + k, v) for k, v in [kv.split('=', 1) for kv in shell_params.split('&')]]) job_params = dict([('job_' + k, v) for k, v in [kv.split('=', 1) for kv in job_params.split('&')]]) params.update(shell_params) params.update(job_params) log.debug("Converted URL '%s' to destination runner=cli, params=%s" % (url, params)) # Create a dynamic JobDestination return JobDestination(runner='cli', params=params)
def url_to_destination(self, url): params = {} shell_params, job_params = url.split('/')[2:4] # split 'foo=bar&baz=quux' into { 'foo' : 'bar', 'baz' : 'quux' } shell_params = {'shell_' + k: v for k, v in [kv.split('=', 1) for kv in shell_params.split('&')]} job_params = {'job_' + k: v for k, v in [kv.split('=', 1) for kv in job_params.split('&')]} params.update(shell_params) params.update(job_params) log.debug(f"Converted URL '{url}' to destination runner=cli, params={params}") # Create a dynamic JobDestination return JobDestination(runner='cli', params=params)
def gateway_for_hifism(app, job, tool, user): """" The memory requirement of Hifiasm depends on a wrapper's input """ param_dict = dict([(p.name, p.value) for p in job.parameters]) param_dict = tool.params_from_strings(param_dict, app) tool_id = tool.id if user: user_roles = [ role.name for role in user.all_roles() if not role.deleted ] user_preferences = user.extra_preferences email = user.email user_id = user.id else: user_roles = [] user_preferences = [] email = '' user_id = -1 try: env, params, runner, spec, tags = _gateway(tool_id, user_preferences, user_roles, user_id, email, ft=FAST_TURNAROUND, special_tools=SPECIAL_TOOLS, memory_scale=memory_scale) except Exception as e: return JobMappingException(str(e)) limits = _get_limits(runner) request_memory = str( min(_compute_memory_for_hifiasm(param_dict), limits.get('mem'))) + 'G' params['request_memory'] = request_memory resubmit = [] if next_dest: resubmit = [{ 'condition': 'any_failure and attempt <= 3', 'destination': next_dest }] name = name_it(spec) return JobDestination( id=name, tags=tags, runner=runner, params=params, env=env, resubmit=resubmit, )
def gateway(tool_id, user, memory_scale=1.0, next_dest=None): # And run it. if user: user_roles = [ role.name for role in user.all_roles() if not role.deleted ] user_preferences = user.extra_preferences email = user.email user_id = user.id else: user_roles = [] user_preferences = [] email = '' user_id = -1 if get_tool_id(tool_id).startswith('interactive_tool_') and user_id == -1: raise JobMappingException( "This tool is restricted to registered users, " "please contact a site administrator") if get_tool_id(tool_id).startswith( 'interactive_tool_ml' ) and 'interactive-tool-ml-jupyter-notebook' not in user_roles: raise JobMappingException( "This tool is restricted to authorized users, " "please contact a site administrator") try: env, params, runner, spec, tags = _gateway(tool_id, user_preferences, user_roles, user_id, email, memory_scale=memory_scale) except Exception as e: return JobMappingException(str(e)) resubmit = [] if next_dest: resubmit = [{ 'condition': 'any_failure and attempt <= 3', 'destination': next_dest }] name = name_it(spec) return JobDestination( id=name, tags=tags, runner=runner, params=params, env=env, resubmit=resubmit, )
def url_to_destination( self, url ): """This is used by the runner mapper (a.k.a. dynamic runner) and recovery methods to have runners convert URLs to destinations. New-style runner plugin IDs must match the URL's scheme for this to work. """ runner_name = url.split(':', 1)[0] try: return self.job_runners[runner_name].url_to_destination(url) except Exception, e: log.exception("Unable to convert legacy job runner URL '%s' to job destination, destination will be the '%s' runner with no params: %s" % (url, runner_name, e)) return JobDestination(runner=runner_name)
def dynamic_resubmit_once(resource_params): """Build destination that always fails first time and always re-routes to passing destination.""" job_destination = JobDestination() # Always fail on the first attempt. job_destination['runner'] = "failure_runner" # Resubmit to a valid destination. job_destination['resubmit'] = [ dict( condition="any_failure", destination="local", ) ] return job_destination
def mapping_dynamic_job_wrapper(job): #allocate extra cpus for large files. cpus_avail = multiprocessing.cpu_count() inp_data = dict([(da.name, da.dataset) for da in job.input_datasets]) inp_data.update([(da.name, da.dataset) for da in job.input_library_datasets]) query_file = inp_data["fastq_input1"].file_name query_size = os.path.getsize(query_file) if query_size > 100 * 1024 * 1024: cpunum = cpus_avail else: cpunum = cpus_avail/2 cpunum = _adjustcpus(cpunum) cpu_str = "--ntasks=" + str(cpunum) return JobDestination(runner="slurm", params={"nativeSpecification": cpu_str})
def test_dynamic_mapping_externally_set_job_destination(): mapper = __mapper(__dynamic_destination(dict(function="upload"))) # Initially, the mapper should not have a cached destination assert not hasattr(mapper, 'cached_job_destination') # Overwrite with an externally set job destination manually_set_destination = JobDestination(runner="dynamic") mapper.cached_job_destination = manually_set_destination destination = mapper.get_job_destination({}) assert destination == manually_set_destination assert mapper.cached_job_destination == manually_set_destination # Force overwrite with mapper determined destination mapper.cache_job_destination(None) assert mapper.cached_job_destination is not None assert mapper.cached_job_destination != manually_set_destination assert mapper.job_config.rule_response == "local_runner"
def k8s_container_mapper(tool, referrer, k8s_runner_id="k8s"): params = dict(referrer.params) params['docker_enabled'] = True # Apply default resource limits default_resource_set_name = _get_default_resource_set_name() if default_resource_set_name: params.update(_map_resource_set(default_resource_set_name)) if not _apply_rule_mappings(tool, params): if tool.id in GALAXY_LIB_TOOLS_UNVERSIONED: default_container = params.get('docker_default_container_id') if default_container: params['docker_container_id_override'] = default_container log.debug("[k8s_container_mapper] Dispatching to %s with params %s" % (k8s_runner_id, params)) return JobDestination(runner=k8s_runner_id, params=params)
def gateway(tool_id, user, memory_scale=1.0, next_dest=None): if user: user_roles = [ role.name for role in user.all_roles() if not role.deleted ] user_preferences = user.extra_preferences email = user.email user_id = user.id else: user_roles = [] user_preferences = [] email = '' user_id = -1 try: env, params, runner, spec, tags = _gateway(tool_id, user_preferences, user_roles, user_id, email, ft=FAST_TURNAROUND, special_tools=SPECIAL_TOOLS, memory_scale=memory_scale) except Exception as e: return JobMappingException(str(e)) resubmit = [] if next_dest: resubmit = [{ 'condition': 'any_failure and attempt <= 3', 'destination': next_dest }] name = name_it(spec) return JobDestination( id=name, tags=tags, runner=runner, params=params, env=env, resubmit=resubmit, )
def wig_to_bigwig(job, tool): # wig_to_bigwig needs a lot of memory if the input file is big inp_data = dict([(da.name, da.dataset) for da in job.input_datasets]) inp_data.update([(da.name, da.dataset) for da in job.input_library_datasets]) wig_file = inp_data["input1"].file_name wig_file_size = os.path.getsize(wig_file) / (1024 * 1024.0) # according to http://genome.ucsc.edu/goldenpath/help/bigWig.html # wig2bigwig uses a lot of memory; somewhere on the order of 1.5 times more memory than the uncompressed wiggle input file required_memory = min(max(wig_file_size * 3.0, 16 * 1024), 250 * 1024) # our biggest memory node has 256GB memory params = {} # params["nativeSpecification"] = """ -q galaxy1.q,all.q -p -128 -l galaxy1_slots=1 -l h_vmem=%sM -v _JAVA_OPTIONS -v TEMP -v TMPDIR -v PATH -v PYTHONPATH -v LD_LIBRARY_PATH -v XAPPLRESDIR -v GDFONTPATH -v GNUPLOT_DEFAULT_GDFONT -v MPLCONFIGDIR -soft -l galaxy1_dedicated=1 """ % (required_memory) params['request_memory'] = required_memory / 1024 params['requirements'] = '(GalaxyGroup == "compute")' return JobDestination(id="wig_to_bigwig_job_destination", runner="condor", params=params)
def url_to_destination(self, url): """Convert a legacy URL to a job destination""" if not url: return # Determine the the PBS server url_split = url.split("/") server = url_split[2] if server == '': server = self.default_pbs_server if server is None: raise Exception("Could not find TORQUE server") # Determine the queue, set the PBS destination (not the same thing as a Galaxy job destination) pbs_destination = f'@{server}' pbs_queue = url_split[3] or None if pbs_queue is not None: pbs_destination = f'{pbs_queue}{pbs_destination}' params = dict(destination=pbs_destination) # Determine the args (long-format args were never supported in URLs so they are not supported here) try: opts = url.split('/')[4].strip().lstrip('-').split(' -') assert opts != [''] # stripping the - comes later (in parse_destination_params) for i, opt in enumerate(opts): opts[i] = f"-{opt}" except Exception: opts = [] for opt in opts: param, value = opt.split(None, 1) params[param] = value log.debug( f"Converted URL '{url}' to destination runner=pbs, params={params}" ) # Create a dynamic JobDestination return JobDestination(runner='pbs', params=params)
def url_to_destination(self, url): """Convert a legacy URL to a job destination""" return JobDestination(runner="pulsar", params=url_to_destination_params(url))
def __check_jobs_at_startup(self): """ Checks all jobs that are in the 'new', 'queued' or 'running' state in the database and requeues or cleans up as necessary. Only run as the job handler starts. In case the activation is enforced it will filter out the jobs of inactive users. """ jobs_at_startup = [] if self.track_jobs_in_database: in_list = (model.Job.states.QUEUED, model.Job.states.RUNNING) else: in_list = (model.Job.states.NEW, model.Job.states.QUEUED, model.Job.states.RUNNING) if self.app.config.user_activation_on: jobs_at_startup = self.sa_session.query( model.Job ).enable_eagerloads( False ) \ .outerjoin( model.User ) \ .filter( model.Job.state.in_( in_list ) & ( model.Job.handler == self.app.config.server_name ) & or_( ( model.Job.user_id == null() ), ( model.User.active == true() ) ) ).all() else: jobs_at_startup = self.sa_session.query( model.Job ).enable_eagerloads( False ) \ .filter( model.Job.state.in_( in_list ) & ( model.Job.handler == self.app.config.server_name ) ).all() for job in jobs_at_startup: if not self.app.toolbox.has_tool( job.tool_id, job.tool_version, exact=True): log.warning( "(%s) Tool '%s' removed from tool config, unable to recover job" % (job.id, job.tool_id)) self.job_wrapper(job).fail( 'This tool was disabled before the job completed. Please contact your Galaxy administrator.' ) elif job.job_runner_name is not None and job.job_runner_external_id is None: # This could happen during certain revisions of Galaxy where a runner URL was persisted before the job was dispatched to a runner. log.debug( "(%s) Job runner assigned but no external ID recorded, adding to the job handler queue" % job.id) job.job_runner_name = None if self.track_jobs_in_database: job.set_state(model.Job.states.NEW) else: self.queue.put((job.id, job.tool_id)) elif job.job_runner_name is not None and job.job_runner_external_id is not None and job.destination_id is None: # This is the first start after upgrading from URLs to destinations, convert the URL to a destination and persist job_wrapper = self.job_wrapper(job) job_destination = self.dispatcher.url_to_destination( job.job_runner_name) if job_destination.id is None: job_destination.id = 'legacy_url' job_wrapper.set_job_destination(job_destination, job.job_runner_external_id) self.dispatcher.recover(job, job_wrapper) log.info( '(%s) Converted job from a URL to a destination and recovered' % (job.id)) elif job.job_runner_name is None: # Never (fully) dispatched log.debug( "(%s) No job runner assigned and job still in '%s' state, adding to the job handler queue" % (job.id, job.state)) if self.track_jobs_in_database: job.set_state(model.Job.states.NEW) else: self.queue.put((job.id, job.tool_id)) else: # Already dispatched and running job_wrapper = self.job_wrapper(job) # Use the persisted destination as its params may differ from # what's in the job_conf xml job_destination = JobDestination(id=job.destination_id, runner=job.job_runner_name, params=job.destination_params) # resubmits are not persisted (it's a good thing) so they # should be added back to the in-memory destination on startup try: config_job_destination = self.app.job_config.get_destination( job.destination_id) job_destination.resubmit = config_job_destination.resubmit except KeyError: log.warning( '(%s) Recovered destination id (%s) does not exist in job config (but this may be normal in the case of a dynamically generated destination)', job.id, job.destination_id) job_wrapper.job_runner_mapper.cached_job_destination = job_destination self.dispatcher.recover(job, job_wrapper) if self.sa_session.dirty: self.sa_session.flush()
def __monitor_step(self): """ Called repeatedly by `monitor` to process waiting jobs. Gets any new jobs (either from the database or from its own queue), then iterates over all new and waiting jobs to check the state of the jobs each depends on. If the job has dependencies that have not finished, it it goes to the waiting queue. If the job has dependencies with errors, it is marked as having errors and removed from the queue. If the job belongs to an inactive user it is ignored. Otherwise, the job is dispatched. """ # Pull all new jobs from the queue at once jobs_to_check = [] resubmit_jobs = [] if self.track_jobs_in_database: # Clear the session so we get fresh states for job and all datasets self.sa_session.expunge_all() # Fetch all new jobs hda_not_ready = self.sa_session.query(model.Job.id).enable_eagerloads(False) \ .join(model.JobToInputDatasetAssociation) \ .join(model.HistoryDatasetAssociation) \ .join(model.Dataset) \ .filter(and_( (model.Job.state == model.Job.states.NEW ), or_( ( model.HistoryDatasetAssociation._state == model.HistoryDatasetAssociation.states.FAILED_METADATA ), ( model.HistoryDatasetAssociation.deleted == true() ), ( model.Dataset.state != model.Dataset.states.OK ), ( model.Dataset.deleted == true() ) ) ) ).subquery() ldda_not_ready = self.sa_session.query(model.Job.id).enable_eagerloads(False) \ .join(model.JobToInputLibraryDatasetAssociation) \ .join(model.LibraryDatasetDatasetAssociation) \ .join(model.Dataset) \ .filter(and_((model.Job.state == model.Job.states.NEW), or_((model.LibraryDatasetDatasetAssociation._state != null()), (model.LibraryDatasetDatasetAssociation.deleted == true()), (model.Dataset.state != model.Dataset.states.OK), (model.Dataset.deleted == true())))).subquery() if self.app.config.user_activation_on: jobs_to_check = self.sa_session.query(model.Job).enable_eagerloads(False) \ .outerjoin( model.User ) \ .filter(and_((model.Job.state == model.Job.states.NEW), or_((model.Job.user_id == null()), (model.User.active == true())), (model.Job.handler == self.app.config.server_name), ~model.Job.table.c.id.in_(hda_not_ready), ~model.Job.table.c.id.in_(ldda_not_ready))) \ .order_by(model.Job.id).all() else: jobs_to_check = self.sa_session.query(model.Job).enable_eagerloads(False) \ .filter(and_((model.Job.state == model.Job.states.NEW), (model.Job.handler == self.app.config.server_name), ~model.Job.table.c.id.in_(hda_not_ready), ~model.Job.table.c.id.in_(ldda_not_ready))) \ .order_by(model.Job.id).all() # Fetch all "resubmit" jobs resubmit_jobs = self.sa_session.query(model.Job).enable_eagerloads(False) \ .filter(and_((model.Job.state == model.Job.states.RESUBMITTED), (model.Job.handler == self.app.config.server_name))) \ .order_by(model.Job.id).all() else: # Get job objects and append to watch queue for any which were # previously waiting for job_id in self.waiting_jobs: jobs_to_check.append( self.sa_session.query(model.Job).get(job_id)) try: while 1: message = self.queue.get_nowait() if message is self.STOP_SIGNAL: return # Unpack the message job_id, tool_id = message # Get the job object and append to watch queue jobs_to_check.append( self.sa_session.query(model.Job).get(job_id)) except Empty: pass # Ensure that we get new job counts on each iteration self.__clear_job_count() # Check resubmit jobs first so that limits of new jobs will still be enforced for job in resubmit_jobs: log.debug( '(%s) Job was resubmitted and is being dispatched immediately', job.id) # Reassemble resubmit job destination from persisted value jw = self.job_wrapper(job) jw.job_runner_mapper.cached_job_destination = JobDestination( id=job.destination_id, runner=job.job_runner_name, params=job.destination_params) self.increase_running_job_count(job.user_id, jw.job_destination.id) self.dispatcher.put(jw) # Iterate over new and waiting jobs and look for any that are # ready to run new_waiting_jobs = [] for job in jobs_to_check: try: # Check the job's dependencies, requeue if they're not done. # Some of these states will only happen when using the in-memory job queue job_state = self.__check_job_state(job) if job_state == JOB_WAIT: new_waiting_jobs.append(job.id) elif job_state == JOB_INPUT_ERROR: log.info( "(%d) Job unable to run: one or more inputs in error state" % job.id) elif job_state == JOB_INPUT_DELETED: log.info( "(%d) Job unable to run: one or more inputs deleted" % job.id) elif job_state == JOB_READY: self.dispatcher.put(self.job_wrappers.pop(job.id)) log.info("(%d) Job dispatched" % job.id) elif job_state == JOB_DELETED: log.info("(%d) Job deleted by user while still queued" % job.id) elif job_state == JOB_ADMIN_DELETED: log.info("(%d) Job deleted by admin while still queued" % job.id) elif job_state == JOB_USER_OVER_QUOTA: log.info("(%d) User (%s) is over quota: job paused" % (job.id, job.user_id)) job.set_state(model.Job.states.PAUSED) for dataset_assoc in job.output_datasets + job.output_library_datasets: dataset_assoc.dataset.dataset.state = model.Dataset.states.PAUSED dataset_assoc.dataset.info = "Execution of this dataset's job is paused because you were over your disk quota at the time it was ready to run" self.sa_session.add(dataset_assoc.dataset.dataset) self.sa_session.add(job) elif job_state == JOB_ERROR: log.error("(%d) Error checking job readiness" % job.id) else: log.error("(%d) Job in unknown state '%s'" % (job.id, job_state)) new_waiting_jobs.append(job.id) except Exception: log.exception("failure running job %d" % job.id) # Update the waiting list if not self.track_jobs_in_database: self.waiting_jobs = new_waiting_jobs # Remove cached wrappers for any jobs that are no longer being tracked for id in self.job_wrappers.keys(): if id not in new_waiting_jobs: del self.job_wrappers[id] # Flush, if we updated the state self.sa_session.flush() # Done with the session self.sa_session.remove()
import uuid from galaxy.jobs import ( HasResourceParameters, JobDestination, ) from galaxy.jobs.mapper import ( ERROR_MESSAGE_NO_RULE_FUNCTION, ERROR_MESSAGE_RULE_FUNCTION_NOT_FOUND, JobRunnerMapper, ) from galaxy.util import bunch from . import test_rules WORKFLOW_UUID = uuid.uuid1().hex TOOL_JOB_DESTINATION = JobDestination() DYNAMICALLY_GENERATED_DESTINATION = JobDestination() def test_static_mapping(): mapper = __mapper() assert mapper.get_job_destination({}) is TOOL_JOB_DESTINATION def test_caching(): mapper = __mapper() mapper.get_job_destination({}) mapper.get_job_destination({}) assert mapper.job_wrapper.tool.call_count == 1
def __check_jobs_at_startup( self ): """ Checks all jobs that are in the 'new', 'queued' or 'running' state in the database and requeues or cleans up as necessary. Only run as the job handler starts. In case the activation is enforced it will filter out the jobs of inactive users. """ jobs_at_startup = [] if self.track_jobs_in_database: in_list = ( model.Job.states.QUEUED, model.Job.states.RUNNING ) else: in_list = ( model.Job.states.NEW, model.Job.states.QUEUED, model.Job.states.RUNNING ) if self.app.config.user_activation_on: jobs_at_startup = self.sa_session.query( model.Job ).enable_eagerloads( False ) \ .outerjoin( model.User ) \ .filter( model.Job.state.in_( in_list ) \ & ( model.Job.handler == self.app.config.server_name ) \ & or_( ( model.Job.user_id == None ), ( model.User.active == True ) ) ).all() else: jobs_at_startup = self.sa_session.query( model.Job ).enable_eagerloads( False ) \ .filter( model.Job.state.in_( in_list ) \ & ( model.Job.handler == self.app.config.server_name ) ).all() for job in jobs_at_startup: if job.tool_id not in self.app.toolbox.tools_by_id: log.warning( "(%s) Tool '%s' removed from tool config, unable to recover job" % ( job.id, job.tool_id ) ) self.job_wrapper( job ).fail( 'This tool was disabled before the job completed. Please contact your Galaxy administrator.' ) elif job.job_runner_name is not None and job.job_runner_external_id is None: # This could happen during certain revisions of Galaxy where a runner URL was persisted before the job was dispatched to a runner. log.debug( "(%s) Job runner assigned but no external ID recorded, adding to the job handler queue" % job.id ) job.job_runner_name = None if self.track_jobs_in_database: job.state = model.Job.states.NEW else: self.queue.put( ( job.id, job.tool_id ) ) elif job.job_runner_name is not None and job.job_runner_external_id is not None and job.destination_id is None: # This is the first start after upgrading from URLs to destinations, convert the URL to a destination and persist job_wrapper = self.job_wrapper( job ) job_destination = self.dispatcher.url_to_destination(job.job_runner_name) if job_destination.id is None: job_destination.id = 'legacy_url' job_wrapper.set_job_destination(job_destination, job.job_runner_external_id) self.dispatcher.recover( job, job_wrapper ) log.info('(%s) Converted job from a URL to a destination and recovered' % (job.id)) elif job.job_runner_name is None: # Never (fully) dispatched log.debug( "(%s) No job runner assigned and job still in '%s' state, adding to the job handler queue" % ( job.id, job.state ) ) if self.track_jobs_in_database: job.state = model.Job.states.NEW else: self.queue.put( ( job.id, job.tool_id ) ) else: # Already dispatched and running job_wrapper = self.job_wrapper( job ) # Use the persisted destination as its params may differ from # what's in the job_conf xml job_destination = JobDestination(id=job.destination_id, runner=job.job_runner_name, params=job.destination_params) # resubmits are not persisted (it's a good thing) so they # should be added back to the in-memory destination on startup try: config_job_destination = self.app.job_config.get_destination( job.destination_id ) job_destination.resubmit = config_job_destination.resubmit except KeyError: log.warning( '(%s) Recovered destination id (%s) does not exist in job config (but this may be normal in the case of a dynamically generated destination)', job.id, job.destination_id ) job_wrapper.job_runner_mapper.cached_job_destination = job_destination self.dispatcher.recover( job, job_wrapper ) if self.sa_session.dirty: self.sa_session.flush()
def __dynamic_destination(params={}): return JobDestination(runner="dynamic", params=params)