async def filter_grep(self, params, filter_expr, ignore_case=False, invert_match=False, line_regexp=False): """Filter parameters using a regular expression on parameter names.""" flags = 0 if ignore_case: flags |= re.I pat = re.compile(filter_expr, flags) if not line_regexp: pat_func = pat.search else: pat_func = pat.fullmatch selected_params = [] if not invert_match: for param in params: mat = pat_func(get_name(param)) if mat is not None: selected_params.append(param) else: for param in params: mat = pat_func(get_name(param)) if mat is None: selected_params.append(param) return selected_params
async def _handle_du(self): """summarize parameters with time information""" params = await self._execute_chain('params') param_id_max_len = self._get_param_id_max_len(params) for param in params: meta = param['_'] line = f'{get_param_id(param):{param_id_max_len}} ' line += f'{get_hash_id(param)} ' if meta['finished'] is not None: finished = meta['finished'] if self.args.local: finished = format_local(parse_utc(finished)) line += f"[{finished.partition('.')[0]:>19}] " else: line += ' ' * (19 + 3) if meta['duration'] is not None: line += f"[{format_sec_short(meta['duration']):>7}] " else: line += ' ' * (7 + 3) if meta['succeeded'] is None: line += f' ' elif meta['succeeded']: line += f'succeeded ' else: line += f'FAILED ' line += get_name(param) print(line)
def _get_state_fast(self): """Return the state. Parameter details are removed.""" assert self.lock.locked() state = { 'finished_jobs': None, 'started_jobs': None, 'queued_jobs': None, 'concurrency': self._state['concurrency'], 'next_job_id': self._state['next_job_id'], } for key in ['finished_jobs', 'started_jobs', 'queued_jobs']: jobs = [] for job in self._state[key].values(): job = dict(job) param = job['param'] job['param'] = { '_': { 'param_id': get_param_id(param), 'hash_id': get_hash_id(param), }, '_name': get_name(param), } jobs.append(job) state[key] = jobs return state
async def _handle_d(self): """summarize parameters""" params = await self._execute_chain('params') param_id_max_len = self._get_param_id_max_len(params) for param in params: line = f'{get_param_id(param):{param_id_max_len}} ' line += f'{get_hash_id(param)} ' line += get_name(param) print(line)
def _construct_env(job, job_paths): """Construct environment variables.""" param = job['param'] env = dict(os.environ) env['EXPTOOLS_JOB_DIR'] = job_paths['job_dir'] env['EXPTOOLS_JOB_ID'] = job['job_id'] env['EXPTOOLS_PARAM_ID'] = get_param_id(param) env['EXPTOOLS_HASH_ID'] = get_hash_id(param) env['EXPTOOLS_NAME'] = get_name(param) env['EXPTOOLS_CWD'] = get_cwd(param) or os.getcwd() env['EXPTOOLS_RETRY'] = str(get_retry(param)) env['EXPTOOLS_RETRY_DELAY'] = str(get_retry_delay(param)) env['EXPTOOLS_TIME_LIMIT'] = str(get_time_limit(param)) env['EXPTOOLS_JOB_JSON_PATH'] = job_paths['job.json'] env['EXPTOOLS_PARAM_JSON_PATH'] = job_paths['param.json'] env['EXPTOOLS_RESOURCES_JSON_PATH'] = job_paths['resources.json'] env['EXPTOOLS_STATUS_JSON_PATH'] = job_paths['status.json'] return env
async def _handle_status(self): """show the queue state""" limit = self.args.limit use_similar = self.args.similar if self.args.job_types == 'all': job_types = {'finished', 'started', 'queued'} else: job_types = set(self.args.job_types) estimator = Estimator(self.client.registry, self.client.history) use_color = self.common_args.color == 'yes' if use_color: colored = termcolor.colored else: def colored(s, *args, **kwargs): # pylint: disable=invalid-name,unused-argument """Use no color.""" return s async for queue_state in self._get_queue_state(): oneshot = await self.client.scheduler.is_oneshot() output = '' all_jobs = (queue_state['finished_jobs'] + queue_state['started_jobs'] + queue_state['queued_jobs']) all_params = [job['param'] for job in all_jobs] job_id_max_len = self._get_job_id_max_len(all_jobs) param_id_max_len = self._get_param_id_max_len(all_params) if 'finished' in job_types: succeeded_count = len([job for job in queue_state['finished_jobs'] if job['succeeded']]) failed_count = len(queue_state['finished_jobs']) - succeeded_count finished_jobs_color = 'red' if failed_count else 'green' output += colored( f"Finished jobs (S:{succeeded_count} / F:{failed_count})", finished_jobs_color, attrs=['reverse']) + '\n' if limit and len(queue_state['finished_jobs']) > limit: line = colored(' ', finished_jobs_color, attrs=['reverse']) output += line + ' ...\n' jobs = queue_state['finished_jobs'] if limit: jobs = jobs[-limit:] for job in jobs: if job['succeeded']: line = colored(' ', 'green', attrs=['reverse']) else: line = colored(' ', 'red', attrs=['reverse']) param_id = get_param_id(job['param']) hash_id = get_hash_id(job['param']) name = get_name(job['param']) line += f" {job['job_id']:{job_id_max_len}} {param_id:{param_id_max_len}} {hash_id}" line += f' [{format_sec_short(job_elapsed_time(job)):>7}]' if job['succeeded']: line += ' succeeded ' else: line += ' FAILED ' line += f"{name}" output += line + '\n' output += '\n' remaining_time, rem_map = ( await estimator.estimate_remaining_time(queue_state, False, use_similar)) last_rem = 0. if 'started' in job_types: output += colored( f"Started jobs (A:{len(queue_state['started_jobs'])})", 'cyan', attrs=['reverse']) + '\n' if limit and len(queue_state['started_jobs']) > limit: line = colored(' ', 'cyan', attrs=['reverse']) output += line + ' ...\n' jobs = queue_state['started_jobs'] if limit: jobs = jobs[-limit:] for job in jobs: rem = rem_map[job['job_id']] param_id = get_param_id(job['param']) hash_id = get_hash_id(job['param']) name = get_name(job['param']) line = colored(' ', 'cyan', attrs=['reverse']) line += f" {job['job_id']:{job_id_max_len}} {param_id:{param_id_max_len}} {hash_id}" line += f' [{format_sec_short(job_elapsed_time(job)):>7}]' line += f'+[{format_sec_short(max(rem - last_rem, 0)):>7}]' line += ' ' last_rem = rem line += f"{name}" output += line + '\n' output += '\n' if 'queued' in job_types: output += colored( f"Queued jobs (Q:{len(queue_state['queued_jobs'])})", 'blue', attrs=['reverse']) + ' ' output += 'Scheduler: ' if oneshot: output += colored('Oneshot', 'blue') elif await self.client.scheduler.is_running(): output += colored('Running', 'cyan') else: output += colored('Stopped', 'red') output += '\n' jobs = queue_state['queued_jobs'] if limit: jobs = jobs[:limit] for job in jobs: rem = rem_map[job['job_id']] param_id = get_param_id(job['param']) hash_id = get_hash_id(job['param']) name = get_name(job['param']) line = colored(' ', 'blue', attrs=['reverse']) line += f" {job['job_id']:{job_id_max_len}} {param_id:{param_id_max_len}} {hash_id}" line += f' [{format_sec_short(max(rem - last_rem, 0)):>7}]' line += ' ' last_rem = rem line += f"{name}" output += line + '\n' if limit and len(queue_state['queued_jobs']) > limit: line = colored(' ', 'blue', attrs=['reverse']) output += line + ' ...\n' output += '\n' # output += f"Concurrency: {queue_state['concurrency']}" if not oneshot: # reuse remaining_time pass else: remaining_time, _ = await estimator.estimate_remaining_time(queue_state, True, use_similar) output += await format_estimated_time(remaining_time, queue_state, use_color) + '\n' if self.args.clear_screen: os.system('clear') print(output) if (self.args.stop_empty and not queue_state['started_jobs'] and (oneshot or not queue_state['queued_jobs'])): break
async def _try(self, job, job_id, param, current_retry): """Run a job.""" param_id = get_param_id(param) hash_id = get_hash_id(param) name = get_name(param) expanded_command = [arg.format(**param) for arg in get_command(param)] cwd = get_cwd(param) or os.getcwd() time_limit = get_time_limit(param) succeeded = False try: self.logger.info(f'Launching job {job_id}: {name}') job_paths = await self.output.make_job_directory(job, current_retry) job_paths = await self.output.create_job_files(job, job_paths) env = self._construct_env(job, job_paths) with self.output.open_job_stdio(job_paths) as stdio: stdout, stderr = stdio await self.output.make_tmp_symlinks(param_id, hash_id, job_paths) # Launch process proc = await asyncio.create_subprocess_exec( *expanded_command, cwd=cwd, stdin=asyncio.subprocess.DEVNULL, stdout=stdout, stderr=stderr, env=env, loop=self.loop) await self.queue.set_pid(job_id, proc.pid) # Watch status changes status_task = asyncio.ensure_future( self._watch_status(job_id, job_paths), loop=self.loop) try: if time_limit <= 0: await proc.communicate() else: await asyncio.wait_for(proc.communicate(), time_limit, loop=self.loop) except asyncio.TimeoutError: self.logger.error(f'Timeout while waiting for job {job_id}') finally: status_task.cancel() try: await status_task except concurrent.futures.CancelledError: # Ignore CancelledError because we caused it pass if proc.returncode is None: try: proc.send_signal(signal.SIGTERM) except Exception: # pylint: disable=broad-except self.logger.exception('Exception while killing process') try: await asyncio.wait_for(proc.wait(), 10, loop=self.loop) except Exception: # pylint: disable=broad-except self.logger.exception('Exception while waiting for process') if proc.returncode is None: try: proc.send_signal(signal.SIGKILL) except Exception: # pylint: disable=broad-except self.logger.exception('Exception while killing process') try: await proc.wait() except Exception: # pylint: disable=broad-except self.logger.exception('Exception while waiting for process') # Read status before making the job finished await self._read_status(job_id, job_paths) if proc.returncode == 0: await self.output.make_symlinks(param_id, hash_id, job_paths) succeeded = True except concurrent.futures.CancelledError: # Pass through raise except Exception: # pylint: disable=broad-except self.logger.exception(f'Exception while running job {job_id}') finally: await self.output.remove_tmp_symlinks(param_id, hash_id) return succeeded