def _make_pruner(self): if isinstance(self.pruner_method, str): if self.pruner_method == 'halving': pruner = SuccessiveHalvingPruner( min_resource=self.n_timesteps // 6, reduction_factor=4, min_early_stopping_rate=0) elif self.pruner_method == 'median': pruner = MedianPruner(n_startup_trials=5, n_warmup_steps=self.n_timesteps // 6) elif self.pruner_method == 'none': # Do not prune pruner = NopPruner() else: raise ValueError('Unknown pruner: {}'.format( self.pruner_method)) elif isinstance(self.pruner_method, dict): method_copy = deepcopy(self.pruner_method) method = method_copy.pop('method') if method == 'halving': pruner = SuccessiveHalvingPruner(**method_copy) elif method == 'median': pruner = MedianPruner(**method_copy) elif method == 'none': # Do not prune pruner = NopPruner() else: raise ValueError('Unknown pruner: {}'.format( self.pruner_method)) else: raise ValueError("Wrong type for pruner settings!") return pruner
def _create_pruner(self, pruner_method: str) -> BasePruner: if pruner_method == "halving": pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) elif pruner_method == "median": pruner = MedianPruner(n_startup_trials=self.n_startup_trials, n_warmup_steps=self.n_evaluations // 3) elif pruner_method == "none": # Do not prune pruner = MedianPruner(n_startup_trials=self.n_trials, n_warmup_steps=self.n_evaluations) else: raise ValueError(f"Unknown pruner: {pruner_method}") return pruner
def cv_hyperparam_study(): params = {'class_n': CLASS_N, 'edl_used': EDL_USED} if EDL_USED != 0: params['edl_fun'] = 'mse' params['kl'] = EDL_USED - 1 for test_trial in range(1, 7): params['outer_f'] = test_trial for sb_n in range(1, 11): params['sb_n'] = sb_n study_path = f'study/ecnn{EDL_USED}/sb{sb_n}' if not os.path.exists(study_path): os.makedirs(study_path) sampler = TPESampler() study = optuna.create_study( direction="minimize", # maximaze or minimaze our objective sampler=sampler, # parametrs sampling strategy pruner=MedianPruner( n_startup_trials=5, n_warmup_steps=3, # let's say num epochs interval_steps=1, ), study_name='STUDY', storage="sqlite:///" + study_path + f"/t{test_trial}.db", # storing study results load_if_exists=False # An error will be raised if same name ) study.optimize(lambda trial: objective(trial, params), n_trials=25) print("Number of finished trials: ", len(study.trials)) print("Best trial:") trial = study.best_trial print(" Value: ", trial.value) print(" Params: ") for key, value in trial.params.items(): print(" {}: {}".format(key, value)) return
elif win == 2: draw_count += 1 if n + 1 == draw_count: win_rate = 0.0 else: win_rate = win_count / (n + 1 - draw_count) logging.info( 'trial {} game {} result : win = {}, win count = {}, draw count = {}, win rate = {:.1f}%' .format(trial.trial_id, n, win, win_count, draw_count, win_rate * 100)) # USIエンジン終了 for p in procs: p.stdin.write(b'quit\n') p.stdin.flush() p.wait() # 見込みのない最適化ステップを打ち切り trial.report(-win_rate, n) if trial.should_prune(n): logging.info('trial {} game {} pruned'.format(trial.trial_id, n)) raise optuna.structs.TrialPruned() # 勝率を負の値で返す return -win_rate study = create_study(pruner=MedianPruner(n_warmup_steps=20)) study.optimize(objective, n_trials=args.trials)
def hyperparam_optimization(algo, model_fn, env_fn, n_trials=10, n_timesteps=5000, hyperparams=None, n_jobs=1, sampler_method='random', pruner_method='halving', seed=0, verbose=1): """ :param algo: (str) :param model_fn: (func) function that is used to instantiate the model :param env_fn: (func) function that is used to instantiate the env :param n_trials: (int) maximum number of trials for finding the best hyperparams :param n_timesteps: (int) maximum number of timesteps per trial :param hyperparams: (dict) :param n_jobs: (int) number of parallel jobs :param sampler_method: (str) :param pruner_method: (str) :param seed: (int) :param verbose: (int) :return: (pd.Dataframe) detailed result of the optimization """ # TODO: eval each hyperparams several times to account for noisy evaluation # TODO: take into account the normalization (also for the test env -> sync obs_rms) if hyperparams is None: hyperparams = {} n_startup_trials = 10 # test during 5 episodes n_eval_episodes = 5 # evaluate every 20th of the maximum budget per iteration n_evaluations = 20 eval_freq = int(n_timesteps / n_evaluations) # n_warmup_steps: Disable pruner until the trial reaches the given number of step. if sampler_method == 'random': sampler = RandomSampler(seed=seed) elif sampler_method == 'tpe': sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed) elif sampler_method == 'skopt': # cf https://scikit-optimize.github.io/#skopt.Optimizer # GP: gaussian process # Gradient boosted regression: GBRT sampler = SkoptSampler(skopt_kwargs={ 'base_estimator': "GP", 'acq_func': 'gp_hedge' }) else: raise ValueError('Unknown sampler: {}'.format(sampler_method)) if pruner_method == 'halving': pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) elif pruner_method == 'median': pruner = MedianPruner(n_startup_trials=n_startup_trials, n_warmup_steps=n_evaluations // 3) elif pruner_method == 'none': # Do not prune pruner = MedianPruner(n_startup_trials=n_trials, n_warmup_steps=n_evaluations) else: raise ValueError('Unknown pruner: {}'.format(pruner_method)) if verbose > 0: print("Sampler: {} - Pruner: {}".format(sampler_method, pruner_method)) study = optuna.create_study(sampler=sampler, pruner=pruner) algo_sampler = HYPERPARAMS_SAMPLER[algo] def objective(trial): kwargs = hyperparams.copy() trial.model_class = None if algo == 'her': trial.model_class = hyperparams['model_class'] # Hack to use DDPG/TD3 noise sampler # if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']: if algo in ['ddpg', 'td3'] or trial.model_class in [ DDPG, TD3 ]: # bug to report: changed by Pierre trial.n_actions = env_fn(n_envs=1).action_space.shape[0] kwargs.update(algo_sampler(trial)) model = model_fn(**kwargs) eval_env = env_fn(n_envs=1, eval_env=True) # Account for parallel envs eval_freq_ = eval_freq if isinstance(model.get_env(), VecEnv): eval_freq_ = max(eval_freq // model.get_env().num_envs, 1) # TODO: use non-deterministic eval for Atari? eval_callback = TrialEvalCallback(eval_env, trial, n_eval_episodes=n_eval_episodes, eval_freq=eval_freq_, deterministic=True) if algo == 'her': # Wrap the env if need to flatten the dict obs if isinstance(eval_env, VecEnv): print("UNVECTORIZE ENV") eval_env = _UnvecWrapper(eval_env) # eval_env = HERGoalEnvWrapper(eval_env) # commented by Pierre try: model.learn(n_timesteps, callback=eval_callback) # Free memory model.env.close() eval_env.close() except AssertionError: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned cost = -1 * eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return cost try: study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs) except KeyboardInterrupt: pass print('Number of finished trials: ', len(study.trials)) print('Best trial:') trial = study.best_trial print('Value: ', trial.value) print('Params: ') for key, value in trial.params.items(): print(' {}: {}'.format(key, value)) ######## added by pierre best_params = trial.params print("best params: ", best_params) # print("best value: ", study.best_value) # print("best best trial: ", study.best_trial) # with open('hyperparameter.yml', 'w') as outfile: # yaml.dump(best_params, outfile) ######## return study.trials_dataframe(), best_params
def hyperparam_optimization( algo, model_fn, env_fn, n_trials=10, n_timesteps=5000, hyperparams=None, # noqa: C901 n_jobs=1, sampler_method="tpe", pruner_method="median", n_startup_trials=10, n_evaluations=20, n_eval_episodes=5, storage=None, study_name=None, seed=0, verbose=1, deterministic_eval=True, ): """ :param algo: (str) :param model_fn: (func) function that is used to instantiate the model :param env_fn: (func) function that is used to instantiate the env :param n_trials: (int) maximum number of trials for finding the best hyperparams :param n_timesteps: (int) maximum number of timesteps per trial :param hyperparams: (dict) :param n_jobs: (int) number of parallel jobs :param sampler_method: (str) :param pruner_method: (str) :param n_startup_trials: (int) :param n_evaluations: (int) Evaluate every 20th of the maximum budget per iteration :param n_eval_episodes: (int) Evaluate the model during 5 episodes :param storage: (Optional[str]) :param study_name: (Optional[str]) :param seed: (int) :param verbose: (int) :param deterministic_eval: (bool) :return: (pd.Dataframe) detailed result of the optimization """ # TODO: eval each hyperparams several times to account for noisy evaluation if hyperparams is None: hyperparams = {} eval_freq = int(n_timesteps / n_evaluations) # n_warmup_steps: Disable pruner until the trial reaches the given number of step. if sampler_method == "random": sampler = RandomSampler(seed=seed) elif sampler_method == "tpe": # TODO: try with multivariate=True sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed) elif sampler_method == "skopt": # cf https://scikit-optimize.github.io/#skopt.Optimizer # GP: gaussian process # Gradient boosted regression: GBRT sampler = SkoptSampler(skopt_kwargs={ "base_estimator": "GP", "acq_func": "gp_hedge" }) else: raise ValueError(f"Unknown sampler: {sampler_method}") if pruner_method == "halving": pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) elif pruner_method == "median": pruner = MedianPruner(n_startup_trials=n_startup_trials, n_warmup_steps=n_evaluations // 3) elif pruner_method == "none": # Do not prune pruner = MedianPruner(n_startup_trials=n_trials, n_warmup_steps=n_evaluations) else: raise ValueError(f"Unknown pruner: {pruner_method}") if verbose > 0: print(f"Sampler: {sampler_method} - Pruner: {pruner_method}") study = optuna.create_study(sampler=sampler, pruner=pruner, storage=storage, study_name=study_name, load_if_exists=True, direction="maximize") algo_sampler = HYPERPARAMS_SAMPLER[algo] def objective(trial): kwargs = hyperparams.copy() trial.model_class = None if algo == "her": trial.model_class = hyperparams["model_class"] # Hack to use DDPG/TD3 noise sampler if algo in ["ddpg", "td3"] or trial.model_class in ["ddpg", "td3"]: trial.n_actions = env_fn(n_envs=1).action_space.shape[0] kwargs.update(algo_sampler(trial)) model = model_fn(**kwargs) model.trial = trial eval_env = env_fn(n_envs=1, eval_env=True) # Account for parallel envs eval_freq_ = max(eval_freq // model.get_env().num_envs, 1) # TODO: Use non-deterministic eval for Atari # or use maximum number of steps to avoid infinite loop eval_callback = TrialEvalCallback(eval_env, trial, n_eval_episodes=n_eval_episodes, eval_freq=eval_freq_, deterministic=deterministic_eval) try: model.learn(n_timesteps, callback=eval_callback) # Free memory model.env.close() eval_env.close() except AssertionError as e: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() # Prune hyperparams that generate NaNs print(e) raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return reward try: study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs) except KeyboardInterrupt: pass print("Number of finished trials: ", len(study.trials)) print("Best trial:") trial = study.best_trial print("Value: ", trial.value) print("Params: ") for key, value in trial.params.items(): print(f" {key}: {value}") return study.trials_dataframe()
def hyperparam_optimization(algo, model_fn, env_fn, n_trials=10, n_timesteps=5000, hyperparams=None, n_jobs=1, sampler_method='random', pruner_method='halving', seed=0, verbose=1, timeout=None): """ :param algo: (str) :param model_fn: (func) function that is used to instantiate the model :param env_fn: (func) function that is used to instantiate the env :param n_trials: (int) maximum number of trials for finding the best hyperparams :param n_timesteps: (int) maximum number of timesteps per trial :param hyperparams: (dict) :param n_jobs: (int) number of parallel jobs :param sampler_method: (str) :param pruner_method: (str) :param seed: (int) :param verbose: (int) :return: (pd.Dataframe) detailed result of the optimization """ # TODO: eval each hyperparams several times to account for noisy evaluation # TODO: take into account the normalization (also for the test env -> sync obs_rms) if hyperparams is None: hyperparams = {} # test during 5 episodes n_test_episodes = 5 # evaluate every 20th of the maximum budget per iteration n_evaluations = 20 evaluate_interval = int(n_timesteps / n_evaluations) # n_warmup_steps: Disable pruner until the trial reaches the given number of step. if sampler_method == 'random': sampler = RandomSampler(seed=seed) elif sampler_method == 'tpe': sampler = TPESampler(n_startup_trials=5, seed=seed) elif sampler_method == 'skopt': # cf https://scikit-optimize.github.io/#skopt.Optimizer # GP: gaussian process # Gradient boosted regression: GBRT sampler = SkoptSampler(skopt_kwargs={ 'base_estimator': "GP", 'acq_func': 'gp_hedge' }) else: raise ValueError('Unknown sampler: {}'.format(sampler_method)) if pruner_method == 'halving': pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) elif pruner_method == 'median': pruner = MedianPruner(n_startup_trials=5, n_warmup_steps=n_evaluations // 3) elif pruner_method == 'none': # Do not prune pruner = MedianPruner(n_startup_trials=n_trials, n_warmup_steps=n_evaluations) else: raise ValueError('Unknown pruner: {}'.format(pruner_method)) if verbose > 0: print("Sampler: {} - Pruner: {}".format(sampler_method, pruner_method)) study = optuna.create_study(sampler=sampler, pruner=pruner) algo_sampler = HYPERPARAMS_SAMPLER[algo] def objective(trial): kwargs = hyperparams.copy() trial.model_class = None if algo == 'her': trial.model_class = hyperparams['model_class'] # Hack to use DDPG/TD3 noise sampler if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']: trial.n_actions = env_fn(n_envs=1).action_space.shape[0] kwargs.update(algo_sampler(trial)) def callback(_locals, _globals): """ Callback for monitoring learning progress. :param _locals: (dict) :param _globals: (dict) :return: (bool) If False: stop training """ self_ = _locals['self'] trial = self_.trial # Initialize variables if not hasattr(self_, 'is_pruned'): self_.is_pruned = False self_.last_mean_test_reward = -np.inf self_.last_time_evaluated = 0 self_.eval_idx = 0 if (self_.num_timesteps - self_.last_time_evaluated) < evaluate_interval: return True self_.last_time_evaluated = self_.num_timesteps # Evaluate the trained agent on the test env rewards = [] n_episodes, reward_sum = 0, 0.0 # Sync the obs rms if using vecnormalize # NOTE: this does not cover all the possible cases if isinstance(self_.test_env, VecNormalize): self_.test_env.obs_rms = deepcopy(self_.env.obs_rms) # Do not normalize reward self_.test_env.norm_reward = False obs = self_.test_env.reset() while n_episodes < n_test_episodes: # Use default value for deterministic action, _ = self_.predict(obs) obs, reward, done, _ = self_.test_env.step(action) reward_sum += reward if done: rewards.append(reward_sum) reward_sum = 0.0 n_episodes += 1 obs = self_.test_env.reset() mean_reward = np.mean(rewards) self_.last_mean_test_reward = mean_reward self_.eval_idx += 1 # report best or report current ? # report num_timesteps or elasped time ? trial.report(-1 * mean_reward, self_.eval_idx) # Prune trial if need if trial.should_prune(self_.eval_idx): self_.is_pruned = True return False return True model = model_fn(**kwargs) model.test_env = env_fn(n_envs=1) model.trial = trial if algo == 'her': model.model.trial = trial # Wrap the env if need to flatten the dict obs if isinstance(model.test_env, VecEnv): model.test_env = _UnvecWrapper(model.test_env) model.model.test_env = HERGoalEnvWrapper(model.test_env) try: model.learn(n_timesteps, callback=callback) # Free memory model.env.close() model.test_env.close() except AssertionError: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() model.test_env.close() raise is_pruned = False cost = np.inf if hasattr(model, 'is_pruned'): is_pruned = model.is_pruned cost = -1 * model.last_mean_test_reward del model.env, model.test_env del model if is_pruned: raise optuna.structs.TrialPruned() return cost try: study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs, timeout=timeout, catch=((ValueError, AssertionError))) except KeyboardInterrupt: pass print('Number of finished trials: ', len(study.trials)) print('Best trial:') trial = study.best_trial print('Value: ', trial.value) print('Params: ') for key, value in trial.params.items(): print(' {}: {}'.format(key, value)) return study.trials_dataframe()
def objective(trial, ctf): learning_rate = trial.suggest_uniform('learning_rate', 0.0, 0.1) gamma = trial.suggest_uniform('gamma', 0.0, 0.1) dimensions = trial.suggest_int('dimensions', 750, 1000) x, y = calculate_fisher_discriminant(training_images_flat, training_labels, ctf) local_svm = SVMTree(x.shape[1], list(range(10)), learning_rate, dimensions=dimensions, gamma=gamma) return local_svm.train(x, y, 14) data = [] for ctf in frange(0.0, 1.0, 0.2): study = optuna.create_study(pruner=MedianPruner()) study.optimize(lambda trial: objective(trial, ctf), n_trials=25) best = study.best_params x, y = calculate_fisher_discriminant(training_images_flat, training_labels, ctf) svm = SVMTree(x.shape[1], list(range(10)), best['learning_rate'], dimensions=best['dimensions'], gamma=best['gamma']) t1 = time.time() svm.train(x, y, 14) t2 = time.time() adv_ex, bro_ex, grad_ex, peraccuracy = stage(test_images, test_labels, 'SVM_tree.pickle', 0.08,
def hyperparam_optimization(n_trials=60, n_timesteps=int(1e5), n_jobs=1): """ :param n_trials: (int) maximum number of trials for finding the best hyperparams :param n_timesteps: (int) maximum number of timesteps per trial :param n_jobs: (int) number of parallel jobs :return: (pd.Dataframe) detailed result of the optimization """ n_startup_trials = 5 sampler = TPESampler(n_startup_trials=n_startup_trials, seed=3) pruner = MedianPruner(n_startup_trials=n_startup_trials, n_warmup_steps=15) study = optuna.create_study(sampler=sampler, pruner=pruner) algo_sampler = sample_sac_params def objective(trial): kwargs = {} trial.model_class = None kwargs.update(algo_sampler(trial)) eval_env = ft.envs.CitationNormal() env_train = ft.envs.CitationNormal() model = create_model(env_train, **kwargs) eval_callback = ft.agent.SaveOnBestReturn(eval_env=eval_env, eval_freq=2000, log_path='optimization_logs/tmp/', best_model_save_path='optimization_logs/tmp/', verbose=0) try: model.learn(n_timesteps, callback=eval_callback) # Free memory model.env.close() eval_env.close() env_train.close() except AssertionError: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() env_train.close() raise optuna.exceptions.TrialPruned() except IndexError: model.env.close() eval_env.close() env_train.close() raise optuna.exceptions.TrialPruned() cost = -1 * eval_callback.best_reward del model.env, eval_env del model return cost try: study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs) except KeyboardInterrupt: pass print('Number of finished trials: ', len(study.trials)) print('Best trial:') trial = study.best_trial print('Value: ', trial.value) print('Params: ') for key, value in trial.params.items(): print(' {}: {}'.format(key, value)) return study.trials_dataframe()
return evaluate_accuracy(model, valid_loader) if __name__ == "__main__": """ Create a study and save stduy results to sqlite (for other storages see documentaion) later we can laod our study to examine results (an example in jupyter notebook ) study = optuna.study.load_study(study_name=STUDY_NAME, storage='sqlite:///example.db') """ sampler = TPESampler(seed=10) study = optuna.create_study( direction="maximize", # maximaze or minimaze our objective sampler=sampler, # parametrs sampling strategy pruner=MedianPruner( n_startup_trials=15, n_warmup_steps=5, # let's say num epochs interval_steps=2, ), study_name=STUDY_NAME, storage= "sqlite:///example.db", # storing study results, other storages are available too, see documentation. load_if_exists=True, ) study.optimize(objective, n_trials=50) print("Number of finished trials: ", len(study.trials)) print("Best trial:") trial = study.best_trial
def hyperparam_optimization(n_trials=20, n_timesteps=100000, hyperparams=None, n_jobs=1, sampler_method='random', pruner_method='halving', seed=1, verbose=1): """ :param algo: (str) :param model_fn: (func) function that is used to instantiate the model :param env_fn: (func) function that is used to instantiate the env :param n_trials: (int) maximum number of trials for finding the best hyperparams :param n_timesteps: (int) maximum number of timesteps per trial :param hyperparams: (dict) :param n_jobs: (int) number of parallel jobs :param sampler_method: (str) :param pruner_method: (str) :param seed: (int) :param verbose: (int) :return: (pd.Dataframe) detailed result of the optimization """ # TODO: eval each hyperparams several times to account for noisy evaluation # TODO: take into account the normalization (also for the test env -> sync obs_rms) if hyperparams is None: hyperparams = {} # test during 3000 steps n_test_steps = 1500 # evaluate every 20th of the maximum budget per iteration n_evaluations = 40 evaluate_interval = int(n_timesteps / n_evaluations) # n_warmup_steps: Disable pruner until the trial reaches the given number of step. #sampler = RandomSampler(seed=seed) #sampler = TPESampler(n_startup_trials=5, seed=seed) sampler = SkoptSampler(skopt_kwargs={ 'base_estimator': "GP", 'acq_func': 'gp_hedge' }) #pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) pruner = MedianPruner(n_startup_trials=5, n_warmup_steps=n_evaluations // 3) study = optuna.create_study(study_name="optimisation_PPO2", sampler=sampler, pruner=pruner, storage='sqlite:///optimizationSAC.db', load_if_exists=True) def objective(trial): kwargs = hyperparams.copy() trial.model_class = None kwargs.update(sample_td3_params(trial)) def callback(_locals, _globals): """ Callback for monitoring learning progress. :param _locals: (dict) :param _globals: (dict) :return: (bool) If False: stop training """ self_ = _locals['self'] trial = self_.trial # Initialize variables if not hasattr(self_, 'is_pruned'): self_.is_pruned = False self_.last_mean_test_reward = -np.inf self_.last_time_evaluated = 0 self_.eval_idx = 0 if (self_.num_timesteps - self_.last_time_evaluated) < evaluate_interval: return True self_.last_time_evaluated = self_.num_timesteps # Evaluate the trained agent on the test env rewards = [] n_steps_done, reward_sum = 0, 0.0 # Sync the obs rms if using vecnormalize # NOTE: this does not cover all the possible cases if isinstance(self_.test_env, VecNormalize): self_.test_env.obs_rms = deepcopy(self_.env.obs_rms) self_.test_env.ret_rms = deepcopy(self_.env.ret_rms) # Do not normalize reward self_.test_env.norm_reward = False obs = self_.test_env.reset() while n_steps_done < n_test_steps: # Use default value for deterministic action, _ = self_.predict(obs, ) obs, reward, done, _ = self_.test_env.step(action) reward_sum += reward n_steps_done += 1 if done: rewards.append(reward_sum) reward_sum = 0.0 obs = self_.test_env.reset() n_steps_done = n_test_steps rewards.append(reward_sum) mean_reward = np.mean(rewards) summary = tf.Summary(value=[ tf.Summary.Value(tag='evaluation', simple_value=mean_reward) ]) _locals['writer'].add_summary(summary, self_.num_timesteps) self_.last_mean_test_reward = mean_reward self_.eval_idx += 1 # report best or report current ? # report num_timesteps or elasped time ? trial.report(-1 * mean_reward, self_.eval_idx) # Prune trial if need if trial.should_prune(self_.eval_idx): self_.is_pruned = True return False return True commands = [[1, 0], [2, 0], [3, 0]] env = DummyVecEnv([ lambda: e.AidaBulletEnv(commands, render=True, on_rack=False, default_reward=2, height_weight=5, orientation_weight=3, direction_weight=2, speed_weight=4) ]) model = TD3(MlpPolicy, env, gamma=kwargs['gamma'], learning_rate=kwargs['learning_rate'], batch_size=kwargs['batch_size'], buffer_size=kwargs['buffer_size'], train_freq=kwargs['train_freq'], gradient_steps=kwargs['gradient_steps'], action_noise=kwargs['action_noise'], tensorboard_log="./optimisationSAC/logOPTI") model.test_env = DummyVecEnv([ lambda: e.AidaBulletEnv(commands, render=False, on_rack=False, default_reward=2, height_weight=5, orientation_weight=3, direction_weight=2, speed_weight=2) ]) model.trial = trial try: model.learn(n_timesteps, callback=callback) # Free memory model.env.close() model.test_env.close() except AssertionError: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() model.test_env.close() raise is_pruned = False cost = np.inf if hasattr(model, 'is_pruned'): is_pruned = model.is_pruned cost = -1 * model.last_mean_test_reward try: os.mkdir("./optimisationSAC/resultats/" + str(trial.number)) except FileExistsError: print("Directory already exists") model.save("./optimisation/resultats/" + str(trial.number) + "/" + str(trial.number)) del model.env, model.test_env del model if is_pruned: try: # Optuna >= 0.19.0 raise optuna.exceptions.TrialPruned() except AttributeError: raise optuna.structs.TrialPruned() return cost try: study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs) except KeyboardInterrupt: pass print('Number of finished trials: ', len(study.trials)) print('Best trial:') trial = study.best_trial print('Value: ', trial.value) print('Params: ') for key, value in trial.params.items(): print(' {}: {}'.format(key, value)) return study.trials_dataframe()
def hyperparam_optimization(algo, model_fn, env_fn, n_trials=10, n_timesteps=5000, hyperparams=None, n_jobs=1, sampler_method='random', pruner_method='halving', n_startup_trials=10, n_evaluations=20, n_eval_episodes=1, seed=0, verbose=1): """ :param algo: (str) :param model_fn: (func) function that is used to instantiate the model :param env_fn: (func) function that is used to instantiate the env :param n_trials: (int) maximum number of trials for finding the best hyperparams :param n_timesteps: (int) maximum number of timesteps per trial :param hyperparams: (dict) :param n_jobs: (int) number of parallel jobs :param sampler_method: (str) :param pruner_method: (str) :param n_startup_trials: (int) :param n_evaluations: (int) Evaluate every 20th of the maximum budget per iteration :param n_eval_episodes: (int) Evaluate the model during 5 episodes :param seed: (int) :param verbose: (int) :return: (pd.Dataframe) detailed result of the optimization """ # TODO: eval each hyperparams several times to account for noisy evaluation if hyperparams is None: hyperparams = {} eval_freq = int(n_timesteps / n_evaluations) # n_warmup_steps: Disable pruner until the trial reaches the given number of step. if sampler_method == 'random': sampler = RandomSampler(seed=seed) elif sampler_method == 'tpe': sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed) elif sampler_method == 'skopt': # cf https://scikit-optimize.github.io/#skopt.Optimizer # GP: gaussian process # Gradient boosted regression: GBRT sampler = SkoptSampler(skopt_kwargs={ 'base_estimator': "GP", 'acq_func': 'gp_hedge' }) else: raise ValueError(f'Unknown sampler: {sampler_method}') if pruner_method == 'halving': pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) elif pruner_method == 'median': pruner = MedianPruner(n_startup_trials=n_startup_trials, n_warmup_steps=n_evaluations // 3) elif pruner_method == 'none': # Do not prune pruner = MedianPruner(n_startup_trials=n_trials, n_warmup_steps=n_evaluations) else: raise ValueError(f'Unknown pruner: {pruner_method}') if verbose > 0: print(f"Sampler: {sampler_method} - Pruner: {pruner_method}") study = optuna.create_study(sampler=sampler, pruner=pruner) algo_sampler = HYPERPARAMS_SAMPLER[algo] def objective(trial): kwargs = hyperparams.copy() trial.model_class = None if algo == 'her': trial.model_class = hyperparams['model_class'] # Hack to use DDPG/TD3 noise sampler if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']: trial.n_actions = env_fn(n_envs=1).action_space.shape[0] kwargs.update(algo_sampler(trial)) model = model_fn(**kwargs) model.trial = trial eval_env = env_fn(n_envs=1, eval_env=True) # Account for parallel envs eval_freq_ = max(eval_freq // model.get_env().num_envs, 1) # TODO: use non-deterministic eval for Atari? eval_callback = TrialEvalCallback(eval_env, trial, n_eval_episodes=n_eval_episodes, eval_freq=eval_freq_, deterministic=True) try: model.learn(n_timesteps, callback=eval_callback) # Free memory model.env.close() eval_env.close() except AssertionError as e: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() # Prune hyperparams that generate NaNs print(e) raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned cost = -1 * eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return cost try: study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs) except KeyboardInterrupt: pass print('Number of finished trials: ', len(study.trials)) print('Best trial:') trial = study.best_trial print('Value: ', trial.value) print('Params: ') for key, value in trial.params.items(): print(f' {key}: {value}') return study.trials_dataframe()
N_iterations_max = 10_000 early_stopping_rounds = 50 if boosting_type == "dart": N_iterations_max = 100 early_stopping_rounds = None cv_res = lgb.cv( params, lgb_data_train, num_boost_round=N_iterations_max, early_stopping_rounds=early_stopping_rounds, verbose_eval=False, seed=42, callbacks=[LightGBMPruningCallback(trial, "auc")], ) num_boost_round = len(cv_res["auc-mean"]) trial.set_user_attr("num_boost_round", num_boost_round) return cv_res["auc-mean"][-1] #%% study = optuna.create_study( direction="maximize", sampler=TPESampler(seed=42), pruner=MedianPruner(n_warmup_steps=50), ) study.optimize(objective, n_trials=100, show_progress_bar=True)
logging.info( 'trial {} game {} result : win = {}, win count = {}, draw count = {}, win rate = {:.1f}%' .format(trial.number, n, win, win_count, draw_count, win_rate * 100)) # USIエンジン終了 for p in procs: p.stdin.write(b'quit\n') p.stdin.flush() p.wait() # 見込みのない最適化ステップを打ち切り trial.report(-win_rate, n) if trial.should_prune(n): logging.info('trial {} game {} pruned'.format(trial.number, n)) raise TrialPruned() # 勝率を負の値で返す return -win_rate if args.storage: study = load_study(study_name='mcts_params_optimizer', storage=args.storage, pruner=MedianPruner(n_warmup_steps=args.n_warmup_steps)) else: study = create_study(pruner=MedianPruner( n_warmup_steps=args.n_warmup_steps)) study.optimize(objective, n_trials=args.trials)
if nan_encountered: return float("nan") if eval_callback.is_pruned: raise optuna.exceptions.TrialPruned() return eval_callback.last_mean_reward if __name__ == "__main__": # Set pytorch num threads to 1 for faster training torch.set_num_threads(1) sampler = TPESampler(n_startup_trials=N_STARTUP_TRIALS) # Do not prune before 1/3 of the max budget is used pruner = MedianPruner(n_startup_trials=N_STARTUP_TRIALS, n_warmup_steps=N_EVALUATIONS // 3) study = optuna.create_study(sampler=sampler, pruner=pruner, direction="maximize") try: study.optimize(objective, n_trials=N_TRIALS, n_jobs=N_JOBS, timeout=600) except KeyboardInterrupt: pass print("Number of finished trials: ", len(study.trials)) print("Best trial:")
algo.train(snapshot_mode='latest', seed=seed) # Evaluate min_rollouts = 1000 sampler = ParallelSampler(env, policy, num_envs=20, min_rollouts=min_rollouts) ros = sampler.sample() mean_ret = sum([r.undiscounted_return() for r in ros])/min_rollouts return mean_ret if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() ex_dir = setup_experiment('hyperparams', QBallBalancerSim.name, 'ppo_250Hz_actnorm', seed=args.seed) # Run hyper-parameter optimization name = f'{ex_dir.algo_name}_{ex_dir.add_info}' # e.g. qbb_ppo_fnn_actnorm study = optuna.create_study( study_name=name, storage=f"sqlite:////{osp.join(pyrado.TEMP_DIR, ex_dir, f'{name}.db')}", direction='maximize', pruner=MedianPruner(), load_if_exists=True ) study.optimize(functools.partial(train_and_eval, ex_dir=ex_dir, seed=args.seed), n_trials=100, n_jobs=6) # Save the best hyper-parameters save_list_of_dicts_to_yaml([study.best_params, dict(seed=args.seed)], ex_dir, 'best_hyperparams')
def optimize(env_id, params, args, session_path, session_id): n_trials = args.n_trials n_episodes_per_eval = args.n_episodes_per_eval seed = int(time()) if args.sampler == 'random': sampler = RandomSampler(seed=seed) elif args.sampler == 'tpe': sampler = TPESampler(n_startup_trials=5, seed=seed) elif args.sampler == 'skopt': sampler = SkoptSampler(skopt_kwargs={'base_estimator': "GP", 'acq_func': 'gp_hedge'}) else: raise ValueError('Unknown sampler: {}'.format(args.sampler)) if args.pruner == 'halving': pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) elif args.pruner == 'median': pruner = MedianPruner(n_startup_trials=5) elif args.pruner == 'none': # Do not prune pruner = MedianPruner(n_startup_trials=n_trials) else: raise ValueError('Unknown pruner: {}'.format(args.pruner)) study_name = args.optimizer_study_name if args.optimizer_study_name else f'{session_id}-optimizer_study' storage = f'sqlite:///{study_name}.db' if args.optimizer_use_db else None study = optuna.create_study(study_name=study_name, storage=storage, load_if_exists=True, sampler=sampler, pruner=pruner) # the objective function called by optuna during each trial def objective(trial): # copy to preserve original params _params = params.copy() _params['hyper_params'] = HYPERPARAMS_SAMPLER[args.algorithm.lower()](trial) # network architecture net_arch = trial.suggest_categorical('net_arch', ['8x8', '16x16', '32x32']) layers = map(int, net_arch.split('x')) policy_kwargs = dict(act_fun=tf.nn.relu, net_arch=list(layers)) print(f'*** beginning trial {trial.number}') print('\thyper-parameters:') for param, value in _params['hyper_params'].items(): print(f'\t\t{param}:{value}') print(f'\t\tnet_arch: {net_arch}') _params['save_dir'] = _params['save_dir'] / 'optimizer' try: # purge any previously saved models purge_model(_params, args, interactive=False) ###################################################### # learning phase - on possibly multiple environments # ###################################################### godot_instances = [GodotInstance(o_port, a_port) for o_port, a_port in get_godot_instances(args.n_godot_instances)] env = create_env(args, env_id, godot_instances, _params, session_path) env = VecCheckNan(env, warn_once=False, raise_exception=True) # learn and save model model = init_model(session_path, _params, env, args, policy_kwargs=policy_kwargs) learn(env, model, _params, args, session_path) env.close() ########################################################################## # evaluation phase - single environment (deterministic action selection) # ########################################################################## env = create_env(args, env_id, [GODOT_EVAL_INSTANCE], _params, session_path, eval=True) env = VecCheckNan(env, warn_once=False, raise_exception=True) # loaded previously learned model and evaluate model = init_model(session_path, _params, env, args, eval=True) mean_reward, _ = evaluate(model, env, args, n_episodes=n_episodes_per_eval) env.close() except (AssertionError, ValueError) as e: print(f'pruning optimizer trial {trial} due to exception {e}') raise optuna.exceptions.TrialPruned() # optuna minimizes the objective by default, so we need to flip the sign to maximize cost = -1 * mean_reward return cost try: study.optimize(objective, n_trials) except KeyboardInterrupt: pass print('Number of finished trials: ', len(study.trials)) print('Best trial:') trial = study.best_trial print('Value: ', trial.value) print('Params: ') for key, value in trial.params.items(): print(' {}: {}'.format(key, value)) return study.trials_dataframe()