def _preprocess_hyperparams( self, hyperparams: Dict[str, Any] ) -> Tuple[Dict[str, Any], Optional[Callable], List[BaseCallback]]: self.n_envs = 4 if self.verbose > 0: print(f"Using {self.n_envs} environments") # Convert schedule strings to objects hyperparams = self._preprocess_schedules(hyperparams) # Pre-process train_freq if "train_freq" in hyperparams and isinstance( hyperparams["train_freq"], list): hyperparams["train_freq"] = tuple(hyperparams["train_freq"]) # Should we overwrite the number of timesteps? if self.n_timesteps > 0: if self.verbose: print(f"Overwriting n_timesteps with n={self.n_timesteps}") else: self.n_timesteps = int(hyperparams["n_timesteps"]) # Pre-process normalize config hyperparams = self._preprocess_normalization(hyperparams) # Pre-process policy/buffer keyword arguments # Convert to python object if needed for kwargs_key in { "policy_kwargs", "replay_buffer_class", "replay_buffer_kwargs", }: if kwargs_key in hyperparams.keys() and isinstance( hyperparams[kwargs_key], str): hyperparams[kwargs_key] = eval(hyperparams[kwargs_key]) # Delete keys so the dict can be pass to the model constructor if "n_envs" in hyperparams.keys(): del hyperparams["n_envs"] del hyperparams["n_timesteps"] if "frame_stack" in hyperparams.keys(): self.frame_stack = hyperparams["frame_stack"] del hyperparams["frame_stack"] # obtain a class object from a wrapper name string in hyperparams # and delete the entry env_wrapper = get_wrapper_class(hyperparams) if "env_wrapper" in hyperparams.keys(): del hyperparams["env_wrapper"] callbacks = get_callback_list(hyperparams) if "callback" in hyperparams.keys(): del hyperparams["callback"] return hyperparams, env_wrapper, callbacks
def _preprocess_hyperparams( self, hyperparams: Dict[str, Any] ) -> Tuple[Dict[str, Any], Optional[Callable], List[BaseCallback]]: self.n_envs = hyperparams.get("n_envs", 1) if self.verbose > 0: print(f"Using {self.n_envs} environments") # Convert model class string to an object if needed (when using HER) hyperparams = self._preprocess_her_model_class(hyperparams) hyperparams = self._preprocess_schedules(hyperparams) # Should we overwrite the number of timesteps? if self.n_timesteps > 0: if self.verbose: print(f"Overwriting n_timesteps with n={self.n_timesteps}") else: self.n_timesteps = int(hyperparams["n_timesteps"]) # Pre-process normalize config hyperparams = self._preprocess_normalization(hyperparams) # Pre-process policy keyword arguments if "policy_kwargs" in hyperparams.keys(): # Convert to python object if needed if isinstance(hyperparams["policy_kwargs"], str): hyperparams["policy_kwargs"] = eval( hyperparams["policy_kwargs"]) # Delete keys so the dict can be pass to the model constructor if "n_envs" in hyperparams.keys(): del hyperparams["n_envs"] del hyperparams["n_timesteps"] if "frame_stack" in hyperparams.keys(): self.frame_stack = hyperparams["frame_stack"] del hyperparams["frame_stack"] # obtain a class object from a wrapper name string in hyperparams # and delete the entry env_wrapper = get_wrapper_class(hyperparams) if "env_wrapper" in hyperparams.keys(): del hyperparams["env_wrapper"] callbacks = get_callback_list(hyperparams) if "callback" in hyperparams.keys(): del hyperparams["callback"] return hyperparams, env_wrapper, callbacks
def objective(self, trial: optuna.Trial) -> float: kwargs = self._hyperparams.copy() # Hack to use DDPG/TD3 noise sampler trial.n_actions = self.n_actions # Hack when using HerReplayBuffer trial.using_her_replay_buffer = kwargs.get( "replay_buffer_class") == HerReplayBuffer if trial.using_her_replay_buffer: trial.her_kwargs = kwargs.get("replay_buffer_kwargs", {}) # Sample candidate hyperparameters sampled_hyperparams = HYPERPARAMS_SAMPLER[self.algo](trial) kwargs.update(sampled_hyperparams) model = ALGOS[self.algo]( env=self.create_envs(self.n_envs, no_log=True), tensorboard_log=None, # We do not seed the trial seed=None, verbose=0, **kwargs, ) model.trial = trial eval_env = self.create_envs(n_envs=self.n_eval_envs, eval_env=True) optuna_eval_freq = int(self.n_timesteps / self.n_evaluations) # Account for parallel envs optuna_eval_freq = max(optuna_eval_freq // model.get_env().num_envs, 1) # Use non-deterministic eval for Atari path = None if self.optimization_log_path is not None: path = os.path.join(self.optimization_log_path, f"trial_{str(trial.number)}") callbacks = get_callback_list({"callback": self.specified_callbacks}) eval_callback = TrialEvalCallback( eval_env, trial, best_model_save_path=path, log_path=path, n_eval_episodes=self.n_eval_episodes, eval_freq=optuna_eval_freq, deterministic=self.deterministic_eval, ) callbacks.append(eval_callback) try: model.learn(self.n_timesteps, callback=callbacks) # Free memory model.env.close() eval_env.close() except (AssertionError, ValueError) as e: # Sometimes, random hyperparams can generate NaN # Free memory model.env.close() eval_env.close() # Prune hyperparams that generate NaNs print(e) print("============") print("Sampled hyperparams:") pprint(sampled_hyperparams) raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return reward
def objective(self, trial: optuna.Trial) -> float: kwargs = self._hyperparams.copy() # Hack to use DDPG/TD3 noise sampler trial.n_actions = self.n_actions # Hack when using HerReplayBuffer trial.using_her_replay_buffer = kwargs.get( "replay_buffer_class") == HerReplayBuffer if trial.using_her_replay_buffer: trial.her_kwargs = kwargs.get("replay_buffer_kwargs", {}) # Sample candidate hyperparameters sampled_hyperparams = HYPERPARAMS_SAMPLER[self.algo](trial, self.n_envs) kwargs.update(sampled_hyperparams) n_envs = 1 if self.algo == "ars" else self.n_envs env = self.create_envs(n_envs, no_log=True) # By default, do not activate verbose output to keep # stdout clean with only the trials results trial_verbosity = 0 # Activate verbose mode for the trial in debug mode # See PR #214 if self.verbose >= 2: trial_verbosity = self.verbose model = ALGOS[self.algo]( env=env, tensorboard_log=None, # We do not seed the trial seed=None, verbose=trial_verbosity, device=self.device, **kwargs, ) eval_env = self.create_envs(n_envs=self.n_eval_envs, eval_env=True) optuna_eval_freq = int(self.n_timesteps / self.n_evaluations) # Account for parallel envs optuna_eval_freq = max(optuna_eval_freq // self.n_envs, 1) # Use non-deterministic eval for Atari path = None if self.optimization_log_path is not None: path = os.path.join(self.optimization_log_path, f"trial_{str(trial.number)}") callbacks = get_callback_list({"callback": self.specified_callbacks}) eval_callback = TrialEvalCallback( eval_env, trial, best_model_save_path=path, log_path=path, n_eval_episodes=self.n_eval_episodes, eval_freq=optuna_eval_freq, deterministic=self.deterministic_eval, ) callbacks.append(eval_callback) learn_kwargs = {} # Special case for ARS if self.algo == "ars" and self.n_envs > 1: learn_kwargs["async_eval"] = AsyncEval([ lambda: self.create_envs(n_envs=1, no_log=True) for _ in range(self.n_envs) ], model.policy) try: model.learn(self.n_timesteps, callback=callbacks, **learn_kwargs) # Free memory model.env.close() eval_env.close() except (AssertionError, ValueError, EOFError, BrokenPipeError) as e: # Sometimes, random hyperparams can generate NaN # Free memory # if using subprocvecenv, the following calls can produce errors themselves traceback.print_exc() try: model.env.close() except: pass try: eval_env.close() except: pass # Prune hyperparams that generate NaNs print(e) print("============") print("Sampled hyperparams:") pprint(sampled_hyperparams) return 0 raise optuna.exceptions.TrialPruned() is_pruned = eval_callback.is_pruned reward = eval_callback.last_mean_reward del model.env, eval_env del model if is_pruned: raise optuna.exceptions.TrialPruned() return reward