async def set_finished(self, job_id, succeeded): """Mark an started job as finished.""" now = format_utc(utcnow()) async with self.lock: if job_id not in self._state['started_jobs']: return False job = self._state['started_jobs'][job_id] job['finished'] = now job['duration'] = ( diff_sec(parse_utc(now), parse_utc(job['started']))) job['pid'] = None job['succeeded'] = succeeded if succeeded: await self.history.update(job) self._state['finished_jobs'][job_id] = job del self._state['started_jobs'][job_id] if succeeded: self.logger.info(f'Finished job {job_id} [succeeded]') else: self.logger.warning(f'Finished job {job_id} [FAILED]') self.lock.notify_all() self._check_empty() self._schedule_dump() return True
async def _handle_du(self): """summarize parameters with time information""" params = await self._execute_chain('params') param_id_max_len = self._get_param_id_max_len(params) for param in params: meta = param['_'] line = f'{get_param_id(param):{param_id_max_len}} ' line += f'{get_hash_id(param)} ' if meta['finished'] is not None: finished = meta['finished'] if self.args.local: finished = format_local(parse_utc(finished)) line += f"[{finished.partition('.')[0]:>19}] " else: line += ' ' * (19 + 3) if meta['duration'] is not None: line += f"[{format_sec_short(meta['duration']):>7}] " else: line += ' ' * (7 + 3) if meta['succeeded'] is None: line += f' ' elif meta['succeeded']: line += f'succeeded ' else: line += f'FAILED ' line += get_name(param) print(line)
async def estimate_remaining_time(self, state, oneshot, use_similar): """Estimate the remaining time using the queue state.""" now = utcnow() epsilon = 0.001 # Potential underestimation until the progress reaches 0.1% # Future parallelism cannot be higher than the remaining job count concurrency = max( 1., min(state['concurrency'], len(state['started_jobs']) + len(state['queued_jobs']))) hash_ids = await self.history.hash_ids() history_list = await self.history.history_list(hash_ids) history_map = dict(zip(hash_ids, history_list)) if use_similar: fid_mapping, fvec_map = await self._make_fid_mapping( history_map, hash_ids) # Estimate average per-job duration known_hash_ids = set() known_duration = 0. known_count = 0 # Consider recent jobs first (in case some jobs have duplicate hash_id) for job in reversed(state['started_jobs']): hash_id = get_hash_id(job['param']) if hash_id in known_hash_ids: continue known_hash_ids.add(hash_id) if job['started'] is None: started = now else: started = parse_utc(job['started']) if job.get('status', None) and job['status'].get('progress') >= epsilon: known_duration += diff_sec(now, started) / job['status']['progress'] known_count += 1 for hash_id, history in history_map.items(): if hash_id in known_hash_ids: continue known_hash_ids.add(hash_id) if history['duration'] is not None: known_duration += history['duration'] known_count += 1 avg_duration = known_duration / max(known_count, 1) remaining_time_map = {} for job in state['finished_jobs']: remaining_time_map[job['job_id']] = 0. # Calculate started jobs' remaining time remaining_duration = 0. for job in state['started_jobs']: hash_id = get_hash_id(job['param']) history = history_map.get(hash_id, None) if job['started'] is None: started = now else: started = parse_utc(job['started']) if job.get('status', None) and job['status'].get('progress') >= epsilon: exp_duration = diff_sec(now, started) / job['status']['progress'] remaining_duration += max( exp_duration - diff_sec(now, started), 0.) elif history and history['duration'] is not None: remaining_duration += max( history['duration'] - diff_sec(now, started), 0.) else: if use_similar: exp_duration = (await self._find_closest_duration( history_map, fid_mapping, fvec_map, job['param'])) if exp_duration is None: exp_duration = avg_duration else: exp_duration = avg_duration remaining_duration += max( exp_duration - diff_sec(now, started), 0.) # Take into account concurrency remaining_time_map[ job['job_id']] = remaining_duration / concurrency # Calculate queued jobs' remaining time if not oneshot: for job in state['queued_jobs']: hash_id = get_hash_id(job['param']) history = history_map.get(hash_id, None) if history and history['duration'] is not None: remaining_duration += history['duration'] else: if use_similar: exp_duration = (await self._find_closest_duration( history_map, fid_mapping, fvec_map, job['param'])) if exp_duration is None: exp_duration = avg_duration else: exp_duration = avg_duration remaining_duration += exp_duration # Take into account concurrency remaining_time_map[ job['job_id']] = remaining_duration / concurrency else: for job in state['queued_jobs']: remaining_time_map[ job['job_id']] = remaining_duration / concurrency # Take into account concurrency remaining_time = remaining_duration / concurrency return remaining_time, remaining_time_map
def test_diff_sec(): t1 = parse_utc('2000-01-02 03:04:05.678901') t2 = parse_utc('2000-01-02 02:03:04.678900') assert diff_sec(t1, t2) == 3600 + 60 + 1 + 0.000001
def test_parse_utc_format_utc_short(): assert format_utc_short( parse_utc('2000-01-02 03:04:05.678901')) == '2000-01-02 03:04:05'
def test_as_utc_as_local_as_utc(): t = parse_utc('2000-01-02 03:04:05.678901') assert t == as_utc(as_local(t)) t = parse_local('2000-01-02 03:04:05.678901') assert t == as_local(as_utc(t))
def test_as_utc(): t = parse_utc('2000-01-02 03:04:05.678901') assert t == as_utc(t)
def test_job_elapsed_time_started_job(mock_utcnow): mock_utcnow.return_value = parse_utc('2000-01-02 03:04:15.678901') job = {'finished': None, 'started': '2000-01-02 03:04:05.678901'} assert job_elapsed_time(job) == 10.