def _make_pruner(self): if isinstance(self.pruner_method, str): if self.pruner_method == 'halving': pruner = SuccessiveHalvingPruner( min_resource=self.n_timesteps // 6, reduction_factor=4, min_early_stopping_rate=0) elif self.pruner_method == 'median': pruner = MedianPruner(n_startup_trials=5, n_warmup_steps=self.n_timesteps // 6) elif self.pruner_method == 'none': # Do not prune pruner = NopPruner() else: raise ValueError('Unknown pruner: {}'.format( self.pruner_method)) elif isinstance(self.pruner_method, dict): method_copy = deepcopy(self.pruner_method) method = method_copy.pop('method') if method == 'halving': pruner = SuccessiveHalvingPruner(**method_copy) elif method == 'median': pruner = MedianPruner(**method_copy) elif method == 'none': # Do not prune pruner = NopPruner() else: raise ValueError('Unknown pruner: {}'.format( self.pruner_method)) else: raise ValueError("Wrong type for pruner settings!") return pruner
def main(): import optuna from optuna.pruners import SuccessiveHalvingPruner study = optuna.create_study(pruner=SuccessiveHalvingPruner()) study.optimize(objective, n_trials=100) pruned_trials = [ t for t in study.trials if t.state == optuna.structs.TrialState.PRUNED ] complete_trials = [ t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE ] print('Study statistics: ') print(' Number of finished trials: ', len(study.trials)) print(' Number of pruned trials: ', len(pruned_trials)) print(' Number of complete trials: ', len(complete_trials)) print('Best trial:') trial = study.best_trial print(' Value: ', trial.value) print(' Params: ') for key, value in trial.params.items(): print(' {}: {}'.format(key, value)) print(' User attrs:') for key, value in trial.user_attrs.items(): print(' {}: {}'.format(key, value))
def _create_pruner(self, pruner_method: str) -> BasePruner: if pruner_method == "halving": pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) elif pruner_method == "median": pruner = MedianPruner(n_startup_trials=self.n_startup_trials, n_warmup_steps=self.n_evaluations // 3) elif pruner_method == "none": # Do not prune pruner = MedianPruner(n_startup_trials=self.n_trials, n_warmup_steps=self.n_evaluations) else: raise ValueError(f"Unknown pruner: {pruner_method}") return pruner
def search_neural_arch(non_arch_config, checkpoint_dir=None): optuna.logging.set_verbosity(optuna.logging.FATAL) study = optuna.create_study( directions=["minimize", "maximize"], study_name=str(non_arch_config), sampler=BoTorchSampler(), pruner=SuccessiveHalvingPruner(), # storage='sqlite:///na.db', storage="mysql://root@localhost/example", load_if_exists=True) study.optimize( partial(train_cifar, non_arch_config), n_trials=oom, # n_jobs=4, gc_after_trial=True, callbacks=[nas_report])
def __init__(self, estimators, pipeline=None, pipe_params=None, scoring=None, cv=3, agg_func=np.mean, refit=True, tol=1e-5, max_iter=50, time_limit=None, max_fails=3, study_name=None, save_cv_preds=False, pruner=SuccessiveHalvingPruner(min_resource=3, reduction_factor=3), sampler=TPESampler(**TPESampler.hyperopt_parameters()), storage=None, n_jobs=1, verbose=1, random_state=None): self.estimators = estimators self.pipeline = pipeline self.pipe_params = pipe_params self.scoring = scoring self.cv = cv self.agg_func = agg_func self.refit_ = refit self.tol = tol self.max_iter = max_iter self.time_limit = time_limit self.max_fails = max_fails self.study_name = study_name self.save_cv_preds = save_cv_preds self.pruner = pruner self.sampler = sampler self.storage = storage self.n_jobs = n_jobs self.verbose = verbose self.random_state = random_state self.best_estimator_ = None
def main(): parser = argparse.ArgumentParser() parser.add_argument('--epochs', type=int, default=10) parser.add_argument('--trials', type=int, default=5) args = parser.parse_args() study = optuna.create_study(direction='maximize', pruner=SuccessiveHalvingPruner()) study.optimize(objective_wrapper(args.epochs), n_trials=args.trials) print('Number of finished trials: ', len(study.trials)) print('Best trial:') trial = study.best_trial print(' Value: ', trial.value) print(' Params: ') for key, value in trial.params.items(): print(f' {key}: {value}') study.trials_dataframe().to_csv(DIR / 'result.csv')
def hyperparam_optimization(algo, model_fn, env_fn, n_trials=10, n_timesteps=5000, hyperparams=None, n_jobs=1, sampler_method='random', pruner_method='halving', seed=0, verbose=1): """ :param algo: (str) :param model_fn: (func) function that is used to instantiate the model :param env_fn: (func) function that is used to instantiate the env :param n_trials: (int) maximum number of trials for finding the best hyperparams :param n_timesteps: (int) maximum number of timesteps per trial :param hyperparams: (dict) :param n_jobs: (int) number of parallel jobs :param sampler_method: (str) :param pruner_method: (str) :param seed: (int) :param verbose: (int) :return: (pd.Dataframe) detailed result of the optimization """ # TODO: eval each hyperparams several times to account for noisy evaluation # TODO: take into account the normalization (also for the test env -> sync obs_rms) if hyperparams is None: hyperparams = {} n_startup_trials = 10 # test during 5 episodes n_eval_episodes = 5 # evaluate every 20th of the maximum budget per iteration n_evaluations = 20 eval_freq = int(n_timesteps / n_evaluations) # n_warmup_steps: Disable pruner until the trial reaches the given number of step. if sampler_method == 'random': sampler = RandomSampler(seed=seed) elif sampler_method == 'tpe': sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed) elif sampler_method == 'skopt': # cf https://scikit-optimize.github.io/#skopt.Optimizer # GP: gaussian process # Gradient boosted regression: GBRT sampler = SkoptSampler(skopt_kwargs={ 'base_estimator': "GP", 'acq_func': 'gp_hedge' }) else: raise ValueError('Unknown sampler: {}'.format(sampler_method)) if pruner_method == 'halving': pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) elif pruner_method == 'median': pruner = MedianPruner(n_startup_trials=n_startup_trials, n_warmup_steps=n_evaluations // 3) elif pruner_method == 'none': # Do not prune pruner = MedianPruner(n_startup_trials=n_trials, n_warmup_steps=n_evaluations) else: raise ValueError('Unknown pruner: {}'.format(pruner_method)) if verbose > 0: print("Sampler: {} - Pruner: {}".format(sampler_method, pruner_method)) study = optuna.create_study(sampler=sampler, pruner=pruner) algo_sampler = HYPERPARAMS_SAMPLER[algo] def objective(trial): kwargs = hyperparams.copy() trial.model_class = None if algo == 'her': trial.model_class = hyperparams['model_class'] # Hack to use DDPG/TD3 noise sampler # if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']: if algo in ['ddpg', 'td3'] or trial.model_class in [ DDPG, TD3 ]: # bug to report: changed by Pierre trial.n_actions = env_fn(n_envs=1).action_space.shape[0] kwargs.update(algo_sampler(trial)) model = model_fn(**kwargs) eval_env = env_fn(n_envs=1, eval_env=True) # Account for parallel envs eval_freq_ = eval_freq if isinstance(model.get_env(), VecEnv): eval_freq_ = max(eval_freq // model.get_env().num_envs, 1) # TODO: use non-deterministic eval for Atari? eval_callback = TrialEvalCallback(eval_env, trial, n_eval_episodes=n_eval_episodes, eval_freq=eval_freq_, deterministic=True) if algo == 'her': # Wrap the env if need to flatten the dict obs if isinstance(eval_env, VecEnv): print("UNVECTORIZE ENV") eval_env = _UnvecWrapper(eval_env) # eval_env = HERGoalEnvWrapper(eval_env) # commented by Pierre try: model.learn(n_timesteps, callback=eval_callback) # Free memory model.env.close() eval_env.close() except AssertionError: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned cost = -1 * eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return cost try: study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs) except KeyboardInterrupt: pass print('Number of finished trials: ', len(study.trials)) print('Best trial:') trial = study.best_trial print('Value: ', trial.value) print('Params: ') for key, value in trial.params.items(): print(' {}: {}'.format(key, value)) ######## added by pierre best_params = trial.params print("best params: ", best_params) # print("best value: ", study.best_value) # print("best best trial: ", study.best_trial) # with open('hyperparameter.yml', 'w') as outfile: # yaml.dump(best_params, outfile) ######## return study.trials_dataframe(), best_params
def main(args): workdir = os.path.expanduser(args.tuning_directory) if os.path.exists(workdir) and not args.force: print("* error: %s exists." % workdir) exit(1) os.makedirs(workdir, exist_ok=True) init(args.seed, args.device) device = torch.device(args.device) print("[loading data]") chunks, chunk_lengths, targets, target_lengths = load_data( limit=args.chunks, directory=args.directory) split = np.floor(chunks.shape[0] * args.validation_split).astype(np.int32) train_dataset = ChunkDataSet(chunks[:split], chunk_lengths[:split], targets[:split], target_lengths[:split]) test_dataset = ChunkDataSet(chunks[split:], chunk_lengths[split:], targets[split:], target_lengths[split:]) train_loader = DataLoader(train_dataset, batch_size=args.batch, shuffle=True, num_workers=4, pin_memory=True) test_loader = DataLoader(test_dataset, batch_size=args.batch, num_workers=4, pin_memory=True) def objective(trial): config = toml.load(args.config) lr = 1e-3 #config['block'][0]['stride'] = [trial.suggest_int('stride', 4, 6)] # C1 config['block'][0]['kernel'] = [ int(trial.suggest_discrete_uniform('c1_kernel', 1, 129, 2)) ] config['block'][0]['filters'] = trial.suggest_int( 'c1_filters', 1, 1024) # B1 - B5 for i in range(1, 6): config['block'][i]['repeat'] = trial.suggest_int( 'b%s_repeat' % i, 1, 9) config['block'][i]['filters'] = trial.suggest_int( 'b%s_filters' % i, 1, 512) config['block'][i]['kernel'] = [ int(trial.suggest_discrete_uniform('b%s_kernel' % i, 1, 129, 2)) ] # C2 config['block'][-2]['kernel'] = [ int(trial.suggest_discrete_uniform('c2_kernel', 1, 129, 2)) ] config['block'][-2]['filters'] = trial.suggest_int( 'c2_filters', 1, 1024) # C3 config['block'][-1]['kernel'] = [ int(trial.suggest_discrete_uniform('c3_kernel', 1, 129, 2)) ] config['block'][-1]['filters'] = trial.suggest_int( 'c3_filters', 1, 1024) model = load_symbol(config, 'Model')(config) num_params = sum(p.numel() for p in model.parameters()) print("[trial %s]" % trial.number) if num_params > args.max_params: print("[pruned] network too large") raise optuna.exceptions.TrialPruned() model.to(args.device) model.train() os.makedirs(workdir, exist_ok=True) optimizer = AdamW(model.parameters(), amsgrad=True, lr=lr) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) schedular = CosineAnnealingLR(optimizer, args.epochs * len(train_loader)) for epoch in range(1, args.epochs + 1): try: train_loss, duration = train(model, device, train_loader, optimizer, use_amp=True) val_loss, val_mean, val_median = test(model, device, test_loader) print( "[epoch {}] directory={} loss={:.4f} mean_acc={:.3f}% median_acc={:.3f}%" .format(epoch, workdir, val_loss, val_mean, val_median)) except KeyboardInterrupt: exit() except: print("[pruned] exception") raise optuna.exceptions.TrialPruned() if np.isnan(val_loss): val_loss = 9.9 trial.report(val_loss, epoch) if trial.should_prune(): print("[pruned] unpromising") raise optuna.exceptions.TrialPruned() trial.set_user_attr('seed', args.seed) trial.set_user_attr('val_loss', val_loss) trial.set_user_attr('val_mean', val_mean) trial.set_user_attr('val_median', val_median) trial.set_user_attr('train_loss', train_loss) trial.set_user_attr('batchsize', args.batch) trial.set_user_attr('model_params', num_params) torch.save(model.state_dict(), os.path.join(workdir, "weights_%s.tar" % trial.number)) toml.dump( config, open(os.path.join(workdir, 'config_%s.toml' % trial.number), 'w')) print("[loss] %.4f" % val_loss) return val_loss print("[starting study]") optuna.logging.set_verbosity(optuna.logging.WARNING) study = optuna.create_study(direction='minimize', storage='sqlite:///%s' % os.path.join(workdir, 'tune.db'), study_name='bonito-study', load_if_exists=True, pruner=SuccessiveHalvingPruner()) study.optimize(objective, n_trials=args.trials)
def hyperparam_optimization( algo, model_fn, env_fn, n_trials=10, n_timesteps=5000, hyperparams=None, # noqa: C901 n_jobs=1, sampler_method="tpe", pruner_method="median", n_startup_trials=10, n_evaluations=20, n_eval_episodes=5, storage=None, study_name=None, seed=0, verbose=1, deterministic_eval=True, ): """ :param algo: (str) :param model_fn: (func) function that is used to instantiate the model :param env_fn: (func) function that is used to instantiate the env :param n_trials: (int) maximum number of trials for finding the best hyperparams :param n_timesteps: (int) maximum number of timesteps per trial :param hyperparams: (dict) :param n_jobs: (int) number of parallel jobs :param sampler_method: (str) :param pruner_method: (str) :param n_startup_trials: (int) :param n_evaluations: (int) Evaluate every 20th of the maximum budget per iteration :param n_eval_episodes: (int) Evaluate the model during 5 episodes :param storage: (Optional[str]) :param study_name: (Optional[str]) :param seed: (int) :param verbose: (int) :param deterministic_eval: (bool) :return: (pd.Dataframe) detailed result of the optimization """ # TODO: eval each hyperparams several times to account for noisy evaluation if hyperparams is None: hyperparams = {} eval_freq = int(n_timesteps / n_evaluations) # n_warmup_steps: Disable pruner until the trial reaches the given number of step. if sampler_method == "random": sampler = RandomSampler(seed=seed) elif sampler_method == "tpe": # TODO: try with multivariate=True sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed) elif sampler_method == "skopt": # cf https://scikit-optimize.github.io/#skopt.Optimizer # GP: gaussian process # Gradient boosted regression: GBRT sampler = SkoptSampler(skopt_kwargs={ "base_estimator": "GP", "acq_func": "gp_hedge" }) else: raise ValueError(f"Unknown sampler: {sampler_method}") if pruner_method == "halving": pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) elif pruner_method == "median": pruner = MedianPruner(n_startup_trials=n_startup_trials, n_warmup_steps=n_evaluations // 3) elif pruner_method == "none": # Do not prune pruner = MedianPruner(n_startup_trials=n_trials, n_warmup_steps=n_evaluations) else: raise ValueError(f"Unknown pruner: {pruner_method}") if verbose > 0: print(f"Sampler: {sampler_method} - Pruner: {pruner_method}") study = optuna.create_study(sampler=sampler, pruner=pruner, storage=storage, study_name=study_name, load_if_exists=True, direction="maximize") algo_sampler = HYPERPARAMS_SAMPLER[algo] def objective(trial): kwargs = hyperparams.copy() trial.model_class = None if algo == "her": trial.model_class = hyperparams["model_class"] # Hack to use DDPG/TD3 noise sampler if algo in ["ddpg", "td3"] or trial.model_class in ["ddpg", "td3"]: trial.n_actions = env_fn(n_envs=1).action_space.shape[0] kwargs.update(algo_sampler(trial)) model = model_fn(**kwargs) model.trial = trial eval_env = env_fn(n_envs=1, eval_env=True) # Account for parallel envs eval_freq_ = max(eval_freq // model.get_env().num_envs, 1) # TODO: Use non-deterministic eval for Atari # or use maximum number of steps to avoid infinite loop eval_callback = TrialEvalCallback(eval_env, trial, n_eval_episodes=n_eval_episodes, eval_freq=eval_freq_, deterministic=deterministic_eval) try: model.learn(n_timesteps, callback=eval_callback) # Free memory model.env.close() eval_env.close() except AssertionError as e: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() # Prune hyperparams that generate NaNs print(e) raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return reward try: study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs) except KeyboardInterrupt: pass print("Number of finished trials: ", len(study.trials)) print("Best trial:") trial = study.best_trial print("Value: ", trial.value) print("Params: ") for key, value in trial.params.items(): print(f" {key}: {value}") return study.trials_dataframe()
def hyperparam_optimization(algo, model_fn, env_fn, n_trials=10, n_timesteps=5000, hyperparams=None, n_jobs=1, sampler_method='random', pruner_method='halving', seed=0, verbose=1, timeout=None): """ :param algo: (str) :param model_fn: (func) function that is used to instantiate the model :param env_fn: (func) function that is used to instantiate the env :param n_trials: (int) maximum number of trials for finding the best hyperparams :param n_timesteps: (int) maximum number of timesteps per trial :param hyperparams: (dict) :param n_jobs: (int) number of parallel jobs :param sampler_method: (str) :param pruner_method: (str) :param seed: (int) :param verbose: (int) :return: (pd.Dataframe) detailed result of the optimization """ # TODO: eval each hyperparams several times to account for noisy evaluation # TODO: take into account the normalization (also for the test env -> sync obs_rms) if hyperparams is None: hyperparams = {} # test during 5 episodes n_test_episodes = 5 # evaluate every 20th of the maximum budget per iteration n_evaluations = 20 evaluate_interval = int(n_timesteps / n_evaluations) # n_warmup_steps: Disable pruner until the trial reaches the given number of step. if sampler_method == 'random': sampler = RandomSampler(seed=seed) elif sampler_method == 'tpe': sampler = TPESampler(n_startup_trials=5, seed=seed) elif sampler_method == 'skopt': # cf https://scikit-optimize.github.io/#skopt.Optimizer # GP: gaussian process # Gradient boosted regression: GBRT sampler = SkoptSampler(skopt_kwargs={ 'base_estimator': "GP", 'acq_func': 'gp_hedge' }) else: raise ValueError('Unknown sampler: {}'.format(sampler_method)) if pruner_method == 'halving': pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) elif pruner_method == 'median': pruner = MedianPruner(n_startup_trials=5, n_warmup_steps=n_evaluations // 3) elif pruner_method == 'none': # Do not prune pruner = MedianPruner(n_startup_trials=n_trials, n_warmup_steps=n_evaluations) else: raise ValueError('Unknown pruner: {}'.format(pruner_method)) if verbose > 0: print("Sampler: {} - Pruner: {}".format(sampler_method, pruner_method)) study = optuna.create_study(sampler=sampler, pruner=pruner) algo_sampler = HYPERPARAMS_SAMPLER[algo] def objective(trial): kwargs = hyperparams.copy() trial.model_class = None if algo == 'her': trial.model_class = hyperparams['model_class'] # Hack to use DDPG/TD3 noise sampler if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']: trial.n_actions = env_fn(n_envs=1).action_space.shape[0] kwargs.update(algo_sampler(trial)) def callback(_locals, _globals): """ Callback for monitoring learning progress. :param _locals: (dict) :param _globals: (dict) :return: (bool) If False: stop training """ self_ = _locals['self'] trial = self_.trial # Initialize variables if not hasattr(self_, 'is_pruned'): self_.is_pruned = False self_.last_mean_test_reward = -np.inf self_.last_time_evaluated = 0 self_.eval_idx = 0 if (self_.num_timesteps - self_.last_time_evaluated) < evaluate_interval: return True self_.last_time_evaluated = self_.num_timesteps # Evaluate the trained agent on the test env rewards = [] n_episodes, reward_sum = 0, 0.0 # Sync the obs rms if using vecnormalize # NOTE: this does not cover all the possible cases if isinstance(self_.test_env, VecNormalize): self_.test_env.obs_rms = deepcopy(self_.env.obs_rms) # Do not normalize reward self_.test_env.norm_reward = False obs = self_.test_env.reset() while n_episodes < n_test_episodes: # Use default value for deterministic action, _ = self_.predict(obs) obs, reward, done, _ = self_.test_env.step(action) reward_sum += reward if done: rewards.append(reward_sum) reward_sum = 0.0 n_episodes += 1 obs = self_.test_env.reset() mean_reward = np.mean(rewards) self_.last_mean_test_reward = mean_reward self_.eval_idx += 1 # report best or report current ? # report num_timesteps or elasped time ? trial.report(-1 * mean_reward, self_.eval_idx) # Prune trial if need if trial.should_prune(self_.eval_idx): self_.is_pruned = True return False return True model = model_fn(**kwargs) model.test_env = env_fn(n_envs=1) model.trial = trial if algo == 'her': model.model.trial = trial # Wrap the env if need to flatten the dict obs if isinstance(model.test_env, VecEnv): model.test_env = _UnvecWrapper(model.test_env) model.model.test_env = HERGoalEnvWrapper(model.test_env) try: model.learn(n_timesteps, callback=callback) # Free memory model.env.close() model.test_env.close() except AssertionError: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() model.test_env.close() raise is_pruned = False cost = np.inf if hasattr(model, 'is_pruned'): is_pruned = model.is_pruned cost = -1 * model.last_mean_test_reward del model.env, model.test_env del model if is_pruned: raise optuna.structs.TrialPruned() return cost try: study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs, timeout=timeout, catch=((ValueError, AssertionError))) except KeyboardInterrupt: pass print('Number of finished trials: ', len(study.trials)) print('Best trial:') trial = study.best_trial print('Value: ', trial.value) print('Params: ') for key, value in trial.params.items(): print(' {}: {}'.format(key, value)) return study.trials_dataframe()
def run(n_timesteps=train.shape[0], seed=42, n_trials=100): n_startup_trials = 10 # evaluate every 20th of the maximum budget per iteration n_evaluations = 20 eval_freq = int(n_timesteps / n_evaluations) sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed) pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) study = optuna.create_study(sampler=sampler, pruner=pruner, study_name=study_name, storage=storage_name, load_if_exists=True) def param_sampler(trial: Trial): batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256]) n_steps = trial.suggest_categorical( 'n_steps', [16, 32, 64, 128, 256, 512, 1024, 2048]) gamma = trial.suggest_categorical('gamma', [0.001, 0.01, 0.1, 0.2, 0.3, 0.5]) learning_rate = trial.suggest_loguniform('lr', 1e-5, 1) ent_coef = trial.suggest_loguniform('ent_coef', 0.00000001, 0.1) cliprange = trial.suggest_categorical('cliprange', [0.1, 0.2, 0.3, 0.4]) noptepochs = trial.suggest_categorical('noptepochs', [1, 5, 10, 20, 30, 50]) lam = trial.suggest_categorical( 'lambda', [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0]) if n_steps < batch_size: nminibatches = 1 else: nminibatches = int(n_steps / batch_size) multiplicator_coef = trial.suggest_uniform("multiplicator_coef", 0.8, 1.2) reward_multiplicator = 100 negative_reward_multiplicator = reward_multiplicator * multiplicator_coef features = [c for c in train.columns.values if "f_" in c ] + ["feature_0", "weight"] train_py_env = MarketEnvDaily( trades=train, features=features, reward_column="resp", weight_column="weight", include_weight=True, reward_multiplicator=reward_multiplicator, negative_reward_multiplicator=negative_reward_multiplicator) train_env = DummyVecEnv([lambda: train_py_env]) num_layers = trial.suggest_categorical("num_layers", [1, 2, 3]) net_arch = [] for i in range(num_layers): l = trial.suggest_categorical("layer_{i}".format(i=i), [1, 2, 3]) net_arch.append(l * len(features)) policy_kwargs = dict(act_fun=tf.nn.swish, net_arch=net_arch) return { 'policy': MlpPolicy, 'env': train_env, 'n_steps': n_steps, 'nminibatches': nminibatches, 'gamma': gamma, 'learning_rate': learning_rate, 'ent_coef': ent_coef, 'cliprange': cliprange, 'noptepochs': noptepochs, 'policy_kwargs': policy_kwargs, 'lam': lam } def objective(trial): kwargs = param_sampler(trial) model = PPO2(**kwargs) eval_callback = TrialEvalCallback(train, eval_df, trial, eval_freq=eval_freq) try: model.learn(n_timesteps, callback=eval_callback) # Free memory model.env.close() except AssertionError: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned sum_of_t_coef = -1 * eval_callback.sum_of_t_coef del model.env del model if is_pruned: raise optuna.exceptions.TrialPruned() return float(sum_of_t_coef) try: study.optimize(objective, n_trials=n_trials) except KeyboardInterrupt: pass print('Number of finished trials: ', len(study.trials)) print('Best trial:') trial = study.best_trial print('Value: ', trial.value) print('Params: ') for key, value in trial.params.items(): print(' {}: {}'.format(key, value))
def hyperparam_optimization(algo, model_fn, env_fn, n_trials=10, n_timesteps=5000, hyperparams=None, n_jobs=1, sampler_method='random', pruner_method='halving', n_startup_trials=10, n_evaluations=20, n_eval_episodes=1, seed=0, verbose=1): """ :param algo: (str) :param model_fn: (func) function that is used to instantiate the model :param env_fn: (func) function that is used to instantiate the env :param n_trials: (int) maximum number of trials for finding the best hyperparams :param n_timesteps: (int) maximum number of timesteps per trial :param hyperparams: (dict) :param n_jobs: (int) number of parallel jobs :param sampler_method: (str) :param pruner_method: (str) :param n_startup_trials: (int) :param n_evaluations: (int) Evaluate every 20th of the maximum budget per iteration :param n_eval_episodes: (int) Evaluate the model during 5 episodes :param seed: (int) :param verbose: (int) :return: (pd.Dataframe) detailed result of the optimization """ # TODO: eval each hyperparams several times to account for noisy evaluation if hyperparams is None: hyperparams = {} eval_freq = int(n_timesteps / n_evaluations) # n_warmup_steps: Disable pruner until the trial reaches the given number of step. if sampler_method == 'random': sampler = RandomSampler(seed=seed) elif sampler_method == 'tpe': sampler = TPESampler(n_startup_trials=n_startup_trials, seed=seed) elif sampler_method == 'skopt': # cf https://scikit-optimize.github.io/#skopt.Optimizer # GP: gaussian process # Gradient boosted regression: GBRT sampler = SkoptSampler(skopt_kwargs={ 'base_estimator': "GP", 'acq_func': 'gp_hedge' }) else: raise ValueError(f'Unknown sampler: {sampler_method}') if pruner_method == 'halving': pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) elif pruner_method == 'median': pruner = MedianPruner(n_startup_trials=n_startup_trials, n_warmup_steps=n_evaluations // 3) elif pruner_method == 'none': # Do not prune pruner = MedianPruner(n_startup_trials=n_trials, n_warmup_steps=n_evaluations) else: raise ValueError(f'Unknown pruner: {pruner_method}') if verbose > 0: print(f"Sampler: {sampler_method} - Pruner: {pruner_method}") study = optuna.create_study(sampler=sampler, pruner=pruner) algo_sampler = HYPERPARAMS_SAMPLER[algo] def objective(trial): kwargs = hyperparams.copy() trial.model_class = None if algo == 'her': trial.model_class = hyperparams['model_class'] # Hack to use DDPG/TD3 noise sampler if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']: trial.n_actions = env_fn(n_envs=1).action_space.shape[0] kwargs.update(algo_sampler(trial)) model = model_fn(**kwargs) model.trial = trial eval_env = env_fn(n_envs=1, eval_env=True) # Account for parallel envs eval_freq_ = max(eval_freq // model.get_env().num_envs, 1) # TODO: use non-deterministic eval for Atari? eval_callback = TrialEvalCallback(eval_env, trial, n_eval_episodes=n_eval_episodes, eval_freq=eval_freq_, deterministic=True) try: model.learn(n_timesteps, callback=eval_callback) # Free memory model.env.close() eval_env.close() except AssertionError as e: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() # Prune hyperparams that generate NaNs print(e) raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned cost = -1 * eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return cost try: study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs) except KeyboardInterrupt: pass print('Number of finished trials: ', len(study.trials)) print('Best trial:') trial = study.best_trial print('Value: ', trial.value) print('Params: ') for key, value in trial.params.items(): print(f' {key}: {value}') return study.trials_dataframe()
def run( self, true, space, init_state=None, objective_fn=None, n=100, early_stopping=None, timeout=None, constraint=None, callbacks=None, show_progress_bar=True, n_jobs=1, info=None, save=True, filename=None, ): # Verify n_jobs is equal to 1 assert n_jobs == 1, f"Parallel optimization is not implemented yet" # Disable default logging of Optuna optuna.logging.disable_default_handler() # Prepare Optuna objective function if objective_fn is None: objective_fn = lambda params: self.model.objective( true, params, init_state=init_state, constraint=constraint) def objective(trial): params = self._sample(trial, space) return objective_fn(params) # Create Optuna study # Possibility here to change sampler and pruner sampler = TPESampler() pruner = HyperbandPruner() self.study = optuna.create_study(direction="minimize", pruner=SuccessiveHalvingPruner(), sampler=TPESampler()) # Create callback pbar = tqdm(range(0, n), desc="Parameters Optimization") def custom_callback(study, trial): # Message pbar.set_postfix({ "value": trial.value, "best_value": study.best_value }) pbar.update() # Early stopping if early_stopping is not None: if trial.number - study.best_trial.number > early_stopping: raise EarlyStoppingError("Stopping") # Run optimizer to find best parameters # Try except block allow for early stopping if best value has not changed since a given number of trials try: if callbacks is None: callbacks = [] self.study.optimize( objective, n_trials=n, n_jobs=n_jobs, show_progress_bar=False, timeout=timeout, gc_after_trial=False, # is it accelerating computation ? callbacks=[custom_callback] + callbacks) except EarlyStoppingError: print( f"... Early stopping - best value has not changed since {early_stopping} trials at {self.study.best_value}" ) # Return best value best = self.study.best_params print( f"... Found best solution {best} for value {self.study.best_value}" ) # Compute final loss loss_dict = self.model.objective(true, best, init_state, return_dict=True) if info is None: info = {} # Save parameters if save: self.save_params(filename, message="Parameters calibration", info={ "on": true.columns.tolist(), "init_state": init_state, **loss_dict, **info }) return best
def main(args): workdir = os.path.expanduser(args.tuning_directory) if os.path.exists(workdir) and not args.force: print("* error: %s exists." % workdir) exit(1) os.makedirs(workdir, exist_ok=True) init(args.seed, args.device) device = torch.device(args.device) print("[loading data]") train_data = load_data(limit=args.chunks, directory=args.directory) if os.path.exists(os.path.join(args.directory, 'validation')): valid_data = load_data(directory=os.path.join(args.directory, 'validation'), limit=10000) else: print("[validation set not found: splitting training set]") split = np.floor(len(train_data[0]) * 0.97).astype(np.int32) valid_data = [x[split:] for x in train_data] train_data = [x[:split] for x in train_data] train_loader = DataLoader(ChunkDataSet(*train_data), batch_size=args.batch, shuffle=True, num_workers=4, pin_memory=True) test_loader = DataLoader(ChunkDataSet(*valid_data), batch_size=args.batch, num_workers=4, pin_memory=True) def objective(trial): config = toml.load(args.config) lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1) model = load_symbol(config, 'Model')(config) num_params = sum(p.numel() for p in model.parameters()) print("[trial %s]" % trial.number) model.to(args.device) model.train() os.makedirs(workdir, exist_ok=True) scaler = GradScaler(enabled=True) optimizer = AdamW(model.parameters(), amsgrad=False, lr=lr) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) if hasattr(model, 'seqdist'): criterion = model.seqdist.ctc_loss else: criterion = None lr_scheduler = func_scheduler( optimizer, cosine_decay_schedule(1.0, decay), args.epochs * len(train_loader), warmup_steps=warmup_steps, warmup_ratio=warmup_ratio, ) for epoch in range(1, args.epochs + 1): try: train_loss, duration = train(model, device, train_loader, optimizer, scaler=scaler, use_amp=True, criterion=criterion) val_loss, val_mean, val_median = test(model, device, test_loader, criterion=criterion) print( "[epoch {}] directory={} loss={:.4f} mean_acc={:.3f}% median_acc={:.3f}%" .format(epoch, workdir, val_loss, val_mean, val_median)) except KeyboardInterrupt: exit() except Exception as e: print("[pruned] exception") raise optuna.exceptions.TrialPruned() if np.isnan(val_loss): val_loss = 9.9 trial.report(val_loss, epoch) if trial.should_prune(): print("[pruned] unpromising") raise optuna.exceptions.TrialPruned() trial.set_user_attr('val_loss', val_loss) trial.set_user_attr('val_mean', val_mean) trial.set_user_attr('val_median', val_median) trial.set_user_attr('train_loss', train_loss) trial.set_user_attr('model_params', num_params) torch.save(model.state_dict(), os.path.join(workdir, "weights_%s.tar" % trial.number)) toml.dump( config, open(os.path.join(workdir, 'config_%s.toml' % trial.number), 'w')) print("[loss] %.4f" % val_loss) return val_loss print("[starting study]") optuna.logging.set_verbosity(optuna.logging.WARNING) study = optuna.create_study(direction='minimize', storage='sqlite:///%s' % os.path.join(workdir, 'tune.db'), study_name='bonito-study', load_if_exists=True, pruner=SuccessiveHalvingPruner()) study.optimize(objective, n_trials=args.trials)
sock.listen(NUM_PAR) def objective(trial): con, addr = sock.accept() x = trial.suggest_uniform('x', -10, 10) con.sendall(json.dumps(x).encode()) d = con.recv(1024) con.close() return json.loads(d) db_name = 'sqlite:///test.db' study_name = str(uuid.uuid4()) def func(): study = optuna.load_study(study_name=study_name, storage=db_name) study.optimize(objective, n_trials=10 // NUM_PAR) study = optuna.create_study(study_name=study_name, storage=db_name, pruner=SuccessiveHalvingPruner()) with ThreadPoolExecutor(max_workers=NUM_PAR) as ex: for i in range(NUM_PAR): ex.submit(func) study = optuna.load_study(study_name=study_name, storage=db_name) print(study.best_params)
def optimize(env_id, params, args, session_path, session_id): n_trials = args.n_trials n_episodes_per_eval = args.n_episodes_per_eval seed = int(time()) if args.sampler == 'random': sampler = RandomSampler(seed=seed) elif args.sampler == 'tpe': sampler = TPESampler(n_startup_trials=5, seed=seed) elif args.sampler == 'skopt': sampler = SkoptSampler(skopt_kwargs={'base_estimator': "GP", 'acq_func': 'gp_hedge'}) else: raise ValueError('Unknown sampler: {}'.format(args.sampler)) if args.pruner == 'halving': pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4, min_early_stopping_rate=0) elif args.pruner == 'median': pruner = MedianPruner(n_startup_trials=5) elif args.pruner == 'none': # Do not prune pruner = MedianPruner(n_startup_trials=n_trials) else: raise ValueError('Unknown pruner: {}'.format(args.pruner)) study_name = args.optimizer_study_name if args.optimizer_study_name else f'{session_id}-optimizer_study' storage = f'sqlite:///{study_name}.db' if args.optimizer_use_db else None study = optuna.create_study(study_name=study_name, storage=storage, load_if_exists=True, sampler=sampler, pruner=pruner) # the objective function called by optuna during each trial def objective(trial): # copy to preserve original params _params = params.copy() _params['hyper_params'] = HYPERPARAMS_SAMPLER[args.algorithm.lower()](trial) # network architecture net_arch = trial.suggest_categorical('net_arch', ['8x8', '16x16', '32x32']) layers = map(int, net_arch.split('x')) policy_kwargs = dict(act_fun=tf.nn.relu, net_arch=list(layers)) print(f'*** beginning trial {trial.number}') print('\thyper-parameters:') for param, value in _params['hyper_params'].items(): print(f'\t\t{param}:{value}') print(f'\t\tnet_arch: {net_arch}') _params['save_dir'] = _params['save_dir'] / 'optimizer' try: # purge any previously saved models purge_model(_params, args, interactive=False) ###################################################### # learning phase - on possibly multiple environments # ###################################################### godot_instances = [GodotInstance(o_port, a_port) for o_port, a_port in get_godot_instances(args.n_godot_instances)] env = create_env(args, env_id, godot_instances, _params, session_path) env = VecCheckNan(env, warn_once=False, raise_exception=True) # learn and save model model = init_model(session_path, _params, env, args, policy_kwargs=policy_kwargs) learn(env, model, _params, args, session_path) env.close() ########################################################################## # evaluation phase - single environment (deterministic action selection) # ########################################################################## env = create_env(args, env_id, [GODOT_EVAL_INSTANCE], _params, session_path, eval=True) env = VecCheckNan(env, warn_once=False, raise_exception=True) # loaded previously learned model and evaluate model = init_model(session_path, _params, env, args, eval=True) mean_reward, _ = evaluate(model, env, args, n_episodes=n_episodes_per_eval) env.close() except (AssertionError, ValueError) as e: print(f'pruning optimizer trial {trial} due to exception {e}') raise optuna.exceptions.TrialPruned() # optuna minimizes the objective by default, so we need to flip the sign to maximize cost = -1 * mean_reward return cost try: study.optimize(objective, n_trials) except KeyboardInterrupt: pass print('Number of finished trials: ', len(study.trials)) print('Best trial:') trial = study.best_trial print('Value: ', trial.value) print('Params: ') for key, value in trial.params.items(): print(' {}: {}'.format(key, value)) return study.trials_dataframe()