def start_queue(self, run_context, job_queue): max_runtime = self._enkf_main().analysisConfig().get_max_runtime() if max_runtime == 0: max_runtime = None done_callback_function = EnKFState.forward_model_ok_callback exit_callback_function = EnKFState.forward_model_exit_callback # submit jobs for index, run_arg in enumerate(run_context): if not run_context.is_active(index): continue job_queue.add_job_from_run_arg( run_arg, self._enkf_main().resConfig(), max_runtime, done_callback_function, exit_callback_function, ) job_queue.submit_complete() queue_evaluators = None if ( self._enkf_main().analysisConfig().get_stop_long_running() and self._enkf_main().analysisConfig().minimum_required_realizations > 0 ): queue_evaluators = [ partial( job_queue.stop_long_running_jobs, self._enkf_main().analysisConfig().minimum_required_realizations, ) ] jqm = JobQueueManager(job_queue, queue_evaluators) jqm.execute_queue()
def start_queue(self, run_context, job_queue): max_runtime = self._enkf_main().analysisConfig().get_max_runtime() if max_runtime == 0: max_runtime = None # submit jobs for i in range(len(run_context)): if not run_context.is_active(i): continue run_arg = run_context[i] self.add_job(run_arg, self._enkf_main().resConfig(), job_queue, max_runtime) job_queue.submit_complete() queue_evaluators = None if (self._enkf_main().analysisConfig().get_stop_long_running() and self._enkf_main().analysisConfig( ).minimum_required_realizations > 0): queue_evaluators = [ partial( EnkfSimulationRunner.stop_long_running_jobs, job_queue, self._enkf_main().analysisConfig(). minimum_required_realizations) ] jqm = JobQueueManager(job_queue, queue_evaluators) jqm.execute_queue()
def __init__(self, ert, sim_fs, mask, itr, case_data): self._ert = ert """ :type: res.enkf.EnKFMain """ max_runtime = ert.analysisConfig().get_max_runtime() self._mask = mask job_queue = ert.get_queue_config().create_job_queue() job_queue.set_max_job_duration(max_runtime) self._queue_manager = JobQueueManager(job_queue) subst_list = self._ert.getDataKW() path_fmt = self._ert.getModelConfig().getRunpathFormat() jobname_fmt = self._ert.getModelConfig().getJobnameFormat() self._run_context = ErtRunContext(EnkfRunType.ENSEMBLE_EXPERIMENT, sim_fs, None, mask, path_fmt, jobname_fmt, subst_list, itr) # fill in the missing geo_id data for sim_id, (geo_id, _) in enumerate(case_data): if mask[sim_id]: run_arg = self._run_context[sim_id] run_arg.geo_id = geo_id self._ert.getEnkfSimulationRunner().createRunPath(self._run_context) self._sim_thread = self._run_simulations_simple_step() # Wait until the queue is active before we finish the creation # to ensure sane job status while running while self.isRunning() and not self._queue_manager.isRunning(): sleep(0.1)
def test_kill_queue(self): with TestAreaContext("job_queue_manager_test") as work_area: max_submit_num = 5 job_queue = create_queue(simple_script, max_submit=max_submit_num) manager = JobQueueManager(job_queue) job_queue.kill_all_jobs() manager.execute_queue() for job in job_queue.job_list: assert job.status == JobStatusType.JOB_QUEUE_FAILED
def test_execute_queue(self): with TestAreaContext("job_queue_manager_test") as work_area: job_queue = create_queue(simple_script) manager = JobQueueManager(job_queue) manager.execute_queue() self.assertFalse(job_queue.isRunning()) for job in job_queue.job_list: ok_file = os.path.realpath(os.path.join(job.run_path, "OK")) assert os.path.isfile(ok_file) with open(ok_file, 'r') as f: assert f.read() == "success"
def test_max_submit_reached(self): with TestAreaContext("job_queue_manager_test") as work_area: max_submit_num = 5 job_queue = create_queue(failing_script, max_submit=max_submit_num) manager = JobQueueManager(job_queue) manager.execute_queue() self.assertFalse(manager.isRunning()) #check if it is really max_submit_num assert job_queue.max_submit == max_submit_num for job in job_queue.job_list: assert job.status == JobStatusType.JOB_QUEUE_FAILED assert job.submit_attempt == job_queue.max_submit
def start_queue(self, run_context, job_queue): # submit jobs for i in range(len(run_context)): if not run_context.is_active(i): continue run_arg = run_context[i] self.add_job(run_arg, self._enkf_main().resConfig(), job_queue) job_queue.submit_complete() max_runtime = self._enkf_main().analysisConfig().get_max_runtime() job_queue.set_max_job_duration(max_runtime) jqm = JobQueueManager(job_queue) jqm.execute_queue()
def __init__(self, ert, size, verbose=False): self._ert = ert """ :type: res.enkf.EnKFMain """ self._size = size max_runtime = ert.analysisConfig().get_max_runtime() job_queue = ert.get_queue_config().create_job_queue() self._queue_manager = JobQueueManager(job_queue) self._queue_manager.startQueue(size, verbose=verbose) self._run_args = {} """ :type: dict[int, RunArg] """ self._thread_pool = CThreadPool(8) self._thread_pool.addTaskFunction("submitJob", ENKF_LIB, "enkf_main_isubmit_job__")
def __init__(self, ert, size, verbose=False): self._ert = ert """ :type: res.enkf.EnKFMain """ self._size = size max_runtime = ert.analysisConfig().get_max_runtime() raise Exception( "Code has lost access to job_queue instance. Refactor required.") job_queue = None self._queue_manager = JobQueueManager(job_queue) self._queue_manager.startQueue(size, verbose=verbose) self._run_args = {} """ :type: dict[int, RunArg] """ self._thread_pool = CThreadPool(8) self._thread_pool.addTaskFunction("submitJob", ENKF_LIB, "enkf_main_isubmit_job__")
def __init__(self, ert, init_fs, result_fs, mask, itr , verbose=False): self._ert = ert """ :type: res.enkf.EnKFMain """ max_runtime = ert.analysisConfig().get_max_runtime() self._mask = mask job_queue = ert.get_queue_config().create_job_queue() self._queue_manager = JobQueueManager(job_queue) self._queue_manager.startQueue( mask.count( ), verbose=verbose) self._run_args = {} """ :type: dict[int, RunArg] """ self._thread_pool = CThreadPool(8) self._thread_pool.addTaskFunction("submitJob", ENKF_LIB, "enkf_main_isubmit_job__") subst_list = self._ert.getDataKW( ) path_fmt = self._ert.getModelConfig().getRunpathFormat() self._run_context = ErtRunContext( EnkfRunType.ENSEMBLE_EXPERIMENT, init_fs, result_fs, None, mask, path_fmt, subst_list, itr)