def delete_running(self, timeout_last_refresh=0, dry_run=False): """Delete jobs stalled in the running state for too long timeout_last_refresh, int: number of seconds """ running_all = self.handle.jobs_running() running_timeout = [ job for job in running_all if coarse_utcnow() > job["refresh_time"] + timedelta(seconds=timeout_last_refresh) ] if len(running_timeout) == 0: # Nothing to stop self.refresh_tids(None) return None if dry_run: logger.warning("Dry run. Not removing anything.") logger.info("Removing {0}/{1} running jobs. # all jobs: {2} ".format( len(running_timeout), len(running_all), len(self))) now = coarse_utcnow() logger.info("Current utc time: {0}".format(now)) logger.info("Time horizont: {0}".format(now - timedelta( seconds=timeout_last_refresh))) for job in running_timeout: logger.info("Removing job: ") pjob = job.to_dict() del pjob["misc"] # ignore misc when printing logger.info(pprint.pformat(pjob)) if not dry_run: self.handle.delete(job) logger.info("Job deleted") self.refresh_tids(None)
def serial_evaluate(self, N=-1): for trial in self.trials._dynamic_trials: if trial['state'] == base.JOB_STATE_NEW: trial['state'] = base.JOB_STATE_RUNNING now = coarse_utcnow() trial['book_time'] = now trial['refresh_time'] = now spec = base.spec_from_misc(trial['misc']) ctrl = base.Ctrl(self.trials, current_trial=trial) try: result = self.domain.evaluate(spec, ctrl) except Exception as e: logger.info('job exception: %s' % str(e)) trial['state'] = base.JOB_STATE_ERROR trial['misc']['error'] = (str(type(e)), str(e)) trial['refresh_time'] = coarse_utcnow() if not self.catch_eval_exceptions: # -- JOB_STATE_ERROR means this trial # will be removed from self.trials.trials # by this refresh call. self.trials.refresh() raise else: trial['state'] = base.JOB_STATE_DONE trial['result'] = result trial['refresh_time'] = coarse_utcnow() N -= 1 if N == 0: break self.trials.refresh()
def _evaluate_one(self, trial): if trial['state'] != hyperopt.JOB_STATE_NEW: return trial['state'] = hyperopt.JOB_STATE_RUNNING now = coarse_utcnow() trial['book_time'] = now trial['refresh_time'] = now spec = hyperopt.base.spec_from_misc(trial['misc']) ctrl = hyperopt.base.Ctrl(self, current_trial=trial) try: result = self.attachments['domain'].evaluate(spec, ctrl) except Exception as e: trial['state'] = hyperopt.JOB_STATE_ERROR trial['misc']['error'] = (str(type(e)), str(e)) trial['misc']['e'] = e trial['refresh_time'] = coarse_utcnow() else: trial['state'] = hyperopt.JOB_STATE_DONE trial['result'] = result trial['refresh_time'] = coarse_utcnow()
def add_trial_from_json(trials, result): params = result['params'] scores = result['scores'] tid = len(trials.trials) new_result = {'status': STATUS_OK, 'loss': -max(scores)} new_result.update(result) trials.trials.append({ 'tid': tid, 'state': JOB_STATE_DONE, 'result': new_result, 'misc': { 'tid': tid, 'vals': dict((k, [v]) for k, v in params.items()), 'idxs': dict((k, [tid]) for k in params.keys()), 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'workdir': None, }, 'spec': None, 'exp_key': None, 'book_time': coarse_utcnow(), 'refresh_time': coarse_utcnow() })
def evaluate(mth, run_i, seed): print(mth, run_i, seed, '===== start =====', flush=True) def tpe_objective_function(config): y = problem.evaluate_config(config, optimizer='tpe') return y from hyperopt import tpe, fmin, Trials from hyperopt.utils import coarse_utcnow global_start_time = coarse_utcnow() trials = Trials() fmin(tpe_objective_function, cs, tpe.suggest, max_runs, trials=trials, rstate=np.random.RandomState(seed)) config_list = [trial['misc']['vals'] for trial in trials.trials] perf_list = [trial['result']['loss'] for trial in trials.trials] time_list = [(trial['refresh_time'] - global_start_time).total_seconds() for trial in trials.trials] return config_list, perf_list, time_list
def _write_cancellation_back(trial, e): trial["state"] = base.JOB_STATE_CANCEL trial["misc"]["error"] = (str(type(e)), str(e)) trial["refresh_time"] = coarse_utcnow()
def _write_exception_back(self, trial, e): trial["state"] = base.JOB_STATE_ERROR trial["misc"]["error"] = (str(type(e)), self._get_traceback(e)) trial["refresh_time"] = coarse_utcnow()
def _write_result_back(trial, result): trial["state"] = base.JOB_STATE_DONE trial["result"] = result trial["refresh_time"] = coarse_utcnow()
def _begin_trial_run(trial): trial["state"] = base.JOB_STATE_RUNNING now = coarse_utcnow() trial["book_time"] = now trial["refresh_time"] = now logger.debug("trial task {tid} started".format(tid=trial["tid"]))
def _write_cancellation_back(trial, e): trial['state'] = base.JOB_STATE_CANCEL trial['misc']['error'] = (str(type(e)), str(e)) trial['refresh_time'] = coarse_utcnow()
def _write_exception_back(trial, e): trial['state'] = base.JOB_STATE_ERROR trial['misc']['error'] = (str(type(e)), str(e)) trial['refresh_time'] = coarse_utcnow()
def _write_result_back(trial, result): trial['state'] = base.JOB_STATE_DONE trial['result'] = result trial['refresh_time'] = coarse_utcnow()