def create_callbacks(self): if self.save_freq > 0: # Account for the number of parallel environments self.save_freq = max(self.save_freq // self.n_envs, 1) self.callbacks.append( CheckpointCallback( save_freq=self.save_freq, save_path=self.save_path, name_prefix="rl_model", verbose=1, ) ) # Create test env if needed, do not normalize reward if self.eval_freq > 0 and not self.optimize_hyperparameters: # Account for the number of parallel environments self.eval_freq = max(self.eval_freq // self.n_envs, 1) if self.verbose > 0: print("Creating test environment") save_vec_normalize = SaveVecNormalizeCallback(save_freq=1, save_path=self.params_path) eval_callback = MyEvalCallback( self.create_envs(1, eval_env=True), callback_on_new_best=save_vec_normalize, best_model_save_path=self.save_path, n_eval_episodes=self.n_eval_episodes, log_path=self.save_path, eval_freq=self.eval_freq, deterministic=self.deterministic_eval, ) self.callbacks.append(eval_callback)
def create_callbacks(self): if self.save_freq > 0: # Account for the number of parallel environments self.save_freq = max(self.save_freq // self.n_envs, 1) self.callbacks.append( CheckpointCallback( save_freq=self.save_freq, save_path=self.save_path, name_prefix="rl_model", verbose=1, )) # Create test env if needed, do not normalize reward if self.eval_freq > 0 and not self.optimize_hyperparameters: # Account for the number of parallel environments self.eval_freq = max(self.eval_freq // self.n_envs, 1) if self.verbose > 0: print("Creating test environment") save_vec_normalize = SaveVecNormalizeCallback( save_freq=1, save_path=self.params_path) eval_callback = EvalCallback( self.create_envs(1, eval_env=True), callback_on_new_best=save_vec_normalize, best_model_save_path=self.save_path, n_eval_episodes=self.n_eval_episodes, log_path=self.save_path, eval_freq=self.eval_freq, deterministic=self.deterministic_eval, ) self.callbacks.append(eval_callback) from rl.callbacks_and_wrappers import SaveObservationCallback frequency = 1e5 sampling_factor = self.sampling_factor obs_shape = (4, 84, 84) save_obs_callback = SaveObservationCallback(self.algo, self.save_path, frequency, sampling_factor, obs_shape, verbose=1, delete=True) self.callbacks.append(save_obs_callback)
DummyVecEnv([lambda: gym.make(env_id, t_final=2000)]) for _ in range(4)) eval_callback = CustomRansimCallback( eval_env_list, best_model_save_path=save_path, log_path=save_path, eval_freq=1000000, n_eval_episodes=1, deterministic=False, render=False, plot_results=True) else: if args.verbose > 0: print("Creating test environment") save_vec_normalize = SaveVecNormalizeCallback( save_freq=1, save_path=params_path) eval_callback = EvalCallback( create_env(1, eval_env=True), callback_on_new_best=save_vec_normalize, best_model_save_path=save_path, n_eval_episodes=args.eval_episodes, log_path=save_path, eval_freq=args.eval_freq, deterministic=not is_atari) callbacks.append(eval_callback) # Restore original kwargs if old_kwargs is not None: normalize_kwargs = old_kwargs.copy() # TODO: check for hyperparameters optimization
def create_callbacks(self, hyperparams): if self.wandb_logging: # Create wandb callback config = {"env_name": self.env_id, "log_path": self.save_path} config.update(hyperparams) config.update(hyperparams["policy_kwargs"]) config.update(self.env_kwargs) config.pop("policy_kwargs") sync_tensorboard = self.tensorboard_log != "" self.wandb_run = wandb.init( project="sb3", config=config, sync_tensorboard= sync_tensorboard, # auto-upload sb3's tensorboard metrics monitor_gym= True, # auto-upload the videos of agents playing the game save_code=False, # optional ) self.callbacks.append( WandbCallback( gradient_save_freq=0, #model_save_path=f"models/{self.wandb_run.id}", #model_save_freq=1000000, verbose=2, )) if self.save_freq > 0: # Account for the number of parallel environments self.save_freq = max(self.save_freq // self.n_envs, 1) self.callbacks.append( CheckpointCallback( save_freq=self.save_freq, save_path=self.save_path, name_prefix="rl_model", verbose=1, )) # Create test env if needed, do not normalize reward if self.eval_freq > 0 and not self.optimize_hyperparameters: # Account for the number of parallel environments self.eval_freq = max(self.eval_freq // self.n_envs, 1) if self.verbose > 0: print("Creating test environment") save_vec_normalize = SaveVecNormalizeCallback( save_freq=1, save_path=self.params_path) eval_envs = self.create_envs(self.n_eval_envs, eval_env=True) if self.eval_video_length is not None and self.eval_video_length > 0: # add video recorder which will start at each new evaluation and run for eval_video_length steps or # until the evaluation is done eval_envs = VecVideoRecorder( eval_envs, f"videos/{self.wandb_run.id}", record_video_trigger=lambda x: False, video_length=self.eval_video_length) eval_callback = EvalCallback( eval_envs, callback_on_new_best=save_vec_normalize, best_model_save_path=self.save_path, n_eval_episodes=self.n_eval_episodes, log_path=self.save_path, eval_freq=self.eval_freq, deterministic=self.deterministic_eval, ) self.callbacks.append(eval_callback)