Ejemplo n.º 1
0
    def create_callbacks(self):

        if self.save_freq > 0:
            # Account for the number of parallel environments
            self.save_freq = max(self.save_freq // self.n_envs, 1)
            self.callbacks.append(
                CheckpointCallback(
                    save_freq=self.save_freq,
                    save_path=self.save_path,
                    name_prefix="rl_model",
                    verbose=1,
                )
            )

        # Create test env if needed, do not normalize reward
        if self.eval_freq > 0 and not self.optimize_hyperparameters:
            # Account for the number of parallel environments
            self.eval_freq = max(self.eval_freq // self.n_envs, 1)

            if self.verbose > 0:
                print("Creating test environment")

            save_vec_normalize = SaveVecNormalizeCallback(save_freq=1, save_path=self.params_path)
            eval_callback = MyEvalCallback(
                self.create_envs(1, eval_env=True),
                callback_on_new_best=save_vec_normalize,
                best_model_save_path=self.save_path,
                n_eval_episodes=self.n_eval_episodes,
                log_path=self.save_path,
                eval_freq=self.eval_freq,
                deterministic=self.deterministic_eval,
            )

            self.callbacks.append(eval_callback)
Ejemplo n.º 2
0
    def create_callbacks(self):

        if self.save_freq > 0:
            # Account for the number of parallel environments
            self.save_freq = max(self.save_freq // self.n_envs, 1)
            self.callbacks.append(
                CheckpointCallback(
                    save_freq=self.save_freq,
                    save_path=self.save_path,
                    name_prefix="rl_model",
                    verbose=1,
                ))

        # Create test env if needed, do not normalize reward
        if self.eval_freq > 0 and not self.optimize_hyperparameters:
            # Account for the number of parallel environments
            self.eval_freq = max(self.eval_freq // self.n_envs, 1)

            if self.verbose > 0:
                print("Creating test environment")

            save_vec_normalize = SaveVecNormalizeCallback(
                save_freq=1, save_path=self.params_path)
            eval_callback = EvalCallback(
                self.create_envs(1, eval_env=True),
                callback_on_new_best=save_vec_normalize,
                best_model_save_path=self.save_path,
                n_eval_episodes=self.n_eval_episodes,
                log_path=self.save_path,
                eval_freq=self.eval_freq,
                deterministic=self.deterministic_eval,
            )

            self.callbacks.append(eval_callback)

        from rl.callbacks_and_wrappers import SaveObservationCallback
        frequency = 1e5
        sampling_factor = self.sampling_factor
        obs_shape = (4, 84, 84)
        save_obs_callback = SaveObservationCallback(self.algo,
                                                    self.save_path,
                                                    frequency,
                                                    sampling_factor,
                                                    obs_shape,
                                                    verbose=1,
                                                    delete=True)
        self.callbacks.append(save_obs_callback)
Ejemplo n.º 3
0
                    DummyVecEnv([lambda: gym.make(env_id, t_final=2000)])
                    for _ in range(4))
                eval_callback = CustomRansimCallback(
                    eval_env_list,
                    best_model_save_path=save_path,
                    log_path=save_path,
                    eval_freq=1000000,
                    n_eval_episodes=1,
                    deterministic=False,
                    render=False,
                    plot_results=True)
            else:
                if args.verbose > 0:
                    print("Creating test environment")

                save_vec_normalize = SaveVecNormalizeCallback(
                    save_freq=1, save_path=params_path)
                eval_callback = EvalCallback(
                    create_env(1, eval_env=True),
                    callback_on_new_best=save_vec_normalize,
                    best_model_save_path=save_path,
                    n_eval_episodes=args.eval_episodes,
                    log_path=save_path,
                    eval_freq=args.eval_freq,
                    deterministic=not is_atari)
            callbacks.append(eval_callback)

            # Restore original kwargs
            if old_kwargs is not None:
                normalize_kwargs = old_kwargs.copy()

    # TODO: check for hyperparameters optimization
Ejemplo n.º 4
0
    def create_callbacks(self, hyperparams):
        if self.wandb_logging:
            # Create wandb callback
            config = {"env_name": self.env_id, "log_path": self.save_path}
            config.update(hyperparams)
            config.update(hyperparams["policy_kwargs"])
            config.update(self.env_kwargs)
            config.pop("policy_kwargs")
            sync_tensorboard = self.tensorboard_log != ""
            self.wandb_run = wandb.init(
                project="sb3",
                config=config,
                sync_tensorboard=
                sync_tensorboard,  # auto-upload sb3's tensorboard metrics
                monitor_gym=
                True,  # auto-upload the videos of agents playing the game
                save_code=False,  # optional
            )
            self.callbacks.append(
                WandbCallback(
                    gradient_save_freq=0,
                    #model_save_path=f"models/{self.wandb_run.id}",
                    #model_save_freq=1000000,
                    verbose=2,
                ))

        if self.save_freq > 0:
            # Account for the number of parallel environments
            self.save_freq = max(self.save_freq // self.n_envs, 1)
            self.callbacks.append(
                CheckpointCallback(
                    save_freq=self.save_freq,
                    save_path=self.save_path,
                    name_prefix="rl_model",
                    verbose=1,
                ))

        # Create test env if needed, do not normalize reward
        if self.eval_freq > 0 and not self.optimize_hyperparameters:
            # Account for the number of parallel environments
            self.eval_freq = max(self.eval_freq // self.n_envs, 1)

            if self.verbose > 0:
                print("Creating test environment")

            save_vec_normalize = SaveVecNormalizeCallback(
                save_freq=1, save_path=self.params_path)
            eval_envs = self.create_envs(self.n_eval_envs, eval_env=True)
            if self.eval_video_length is not None and self.eval_video_length > 0:
                # add video recorder which will start at each new evaluation and run for eval_video_length steps or
                # until the evaluation is done
                eval_envs = VecVideoRecorder(
                    eval_envs,
                    f"videos/{self.wandb_run.id}",
                    record_video_trigger=lambda x: False,
                    video_length=self.eval_video_length)
            eval_callback = EvalCallback(
                eval_envs,
                callback_on_new_best=save_vec_normalize,
                best_model_save_path=self.save_path,
                n_eval_episodes=self.n_eval_episodes,
                log_path=self.save_path,
                eval_freq=self.eval_freq,
                deterministic=self.deterministic_eval,
            )
            self.callbacks.append(eval_callback)