Example #1
0
def main(config: str, agent: str):
    with open(config) as fp:
        json_data = json.load(fp)

    config = GameConfig.deserialize(json_data)
    log_dir = config.agents_config[agent]["save_path"]
    if agent == "DQN":
        env = make_atari_env(config.game_name, n_envs=1,
                             seed=0, monitor_dir=log_dir)

    elif agent == "PPO":
        env = make_atari_env(config.game_name, n_envs=8,
                             seed=0, monitor_dir=log_dir)

    else:
        env = make_atari_env(config.game_name, n_envs=16,
                             seed=0, monitor_dir=log_dir)

    env = VecFrameStack(env, n_stack=4)

    agent = AgentLoader.get_agent(agent, config.agents_config, env)

    reward_callback = SaveOnBestTrainingRewardCallback(
        check_freq=100, log_dir=log_dir)

    start_time = time.time()
    steps = 10_000_000
    with ProgressBarManager_new(steps) as progress_callback:
        agent.agent.learn(total_timesteps=steps, callback=[
                          reward_callback, progress_callback])
        # agent.save()
        env.close()

    end_time = time.time() - start_time
    print(f'\n The Training Took {end_time} seconds')
Example #2
0
def test_sync_vec_normalize():
    env = DummyVecEnv([make_env])

    assert unwrap_vec_normalize(env) is None

    env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10.)

    assert isinstance(unwrap_vec_normalize(env), VecNormalize)

    env = VecFrameStack(env, 1)

    assert isinstance(unwrap_vec_normalize(env), VecNormalize)

    eval_env = DummyVecEnv([make_env])
    eval_env = VecNormalize(eval_env, training=False, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10.)
    eval_env = VecFrameStack(eval_env, 1)

    env.reset()
    # Initialize running mean
    for _ in range(100):
        env.step([env.action_space.sample()])

    obs = env.reset()
    original_obs = env.get_original_obs()
    dummy_rewards = np.random.rand(10)
    # Normalization must be different
    assert not np.allclose(obs, eval_env.normalize_obs(original_obs))

    sync_envs_normalization(env, eval_env)

    # Now they must be synced
    assert np.allclose(obs, eval_env.normalize_obs(original_obs))
    assert np.allclose(env.normalize_reward(dummy_rewards), eval_env.normalize_reward(dummy_rewards))
Example #3
0
def test_sync_vec_normalize(make_env):
    env = DummyVecEnv([make_env])

    assert unwrap_vec_normalize(env) is None

    env = VecNormalize(env,
                       norm_obs=True,
                       norm_reward=True,
                       clip_obs=100.0,
                       clip_reward=100.0)

    assert isinstance(unwrap_vec_normalize(env), VecNormalize)

    if not isinstance(env.observation_space, spaces.Dict):
        env = VecFrameStack(env, 1)
        assert isinstance(unwrap_vec_normalize(env), VecNormalize)

    eval_env = DummyVecEnv([make_env])
    eval_env = VecNormalize(eval_env,
                            training=False,
                            norm_obs=True,
                            norm_reward=True,
                            clip_obs=100.0,
                            clip_reward=100.0)

    if not isinstance(env.observation_space, spaces.Dict):
        eval_env = VecFrameStack(eval_env, 1)

    env.seed(0)
    env.action_space.seed(0)

    env.reset()
    # Initialize running mean
    latest_reward = None
    for _ in range(100):
        _, latest_reward, _, _ = env.step([env.action_space.sample()])

    # Check that unnormalized reward is same as original reward
    original_latest_reward = env.get_original_reward()
    assert np.allclose(original_latest_reward,
                       env.unnormalize_reward(latest_reward))

    obs = env.reset()
    dummy_rewards = np.random.rand(10)
    original_obs = env.get_original_obs()
    # Check that unnormalization works
    assert allclose(original_obs, env.unnormalize_obs(obs))
    # Normalization must be different (between different environments)
    assert not allclose(obs, eval_env.normalize_obs(original_obs))

    # Test syncing of parameters
    sync_envs_normalization(env, eval_env)
    # Now they must be synced
    assert allclose(obs, eval_env.normalize_obs(original_obs))
    assert allclose(env.normalize_reward(dummy_rewards),
                    eval_env.normalize_reward(dummy_rewards))
Example #4
0
def train_simple_opponent(args):
    env_name = "WimblepongVisualBadAI-v0"
    env = gym.make(env_name)
    #env = ParallelEnvs(env_name, processes=4, envs_per_process=1)
    env = SubprocVecEnv(
        [make_env(env_name, args.seed + i) for i in range(args.num_envs)],
        start_method="spawn")
    env = VecFrameStack(env, n_stack=4)
    if args.algorithm.lower() == "dqn":
        agent = DQNagent.Agent(env_name, env.observation_space,
                               env.action_space)
    elif args.algorithm.lower() == "ppo":
        agent = ppo_agent_stack_4.Agent()
        agent.init_memory(args.steps_per_env, args.num_envs)
        agent.is_training = True
        if args.checkpoint:
            agent.load_checkpoint()
        elif args.pretrained_model:
            agent.load_model()
    else:
        raise NotImplementedError(
            f"No such algorithm: {args.algorithm.lower()}")

    train(env, agent, args)
    agent.save_policy()
    env.close()
Example #5
0
def make_env(seed: int,
             n_envs: int,
             run_dir: str,
             frame_skip: int,
             frame_stack: int,
             is_eval: bool = False) -> VecEnv:
    """
    Makes vectorized env with required wrappers
    :param seed: Random seed
    :param n_envs: Number of environment to run in parallel
    :param run_dir: Run directory
    :param frame_skip: Skip every nth frame
    :param frame_stack: Stack n frames together
    :param is_eval: True if used for evaluation
    :return: Vectorized env
    """
    if n_envs == 1:
        env = DummyVecEnv([_env_fn(seed, run_dir, frame_skip, is_eval)])
    else:
        env = SubprocVecEnv([
            _env_fn(seed + i, run_dir, frame_skip, is_eval)
            for i in range(n_envs)
        ])
    if frame_stack > 0:
        return VecFrameStack(env, n_stack=4)
    else:
        return env
Example #6
0
def test_vec_transpose_skip(tmp_path, model_class):
    # Fake grayscale with frameskip
    env = FakeImageEnv(screen_height=41,
                       screen_width=40,
                       n_channels=10,
                       discrete=model_class not in {SAC, TD3},
                       channel_first=True)
    env = DummyVecEnv([lambda: env])
    # Stack 5 frames so the observation is now (50, 40, 40) but the env is still channel first
    env = VecFrameStack(env, 5, channels_order="first")
    obs_shape_before = env.reset().shape
    # The observation space should be different as the heuristic thinks it is channel last
    assert not np.allclose(obs_shape_before,
                           VecTransposeImage(env).reset().shape)
    env = VecTransposeImage(env, skip=True)
    # The observation space should be the same as we skip the VecTransposeImage
    assert np.allclose(obs_shape_before, env.reset().shape)

    kwargs = dict(
        n_steps=64,
        policy_kwargs=dict(features_extractor_kwargs=dict(features_dim=32)),
        seed=1,
    )
    model = model_class("CnnPolicy", env, **kwargs).learn(250)

    obs = env.reset()
    action, _ = model.predict(obs, deterministic=True)
Example #7
0
def eval_env_constructor(n_envs=1):
    """
    Evaluation should be in a scalar environment.
    """
    env = make_atari_env("MontezumaRevenge-v0", n_envs=n_envs)
    env = VecFrameStack(env, n_stack=4)
    env = ScalarizeEnvWrapper(env)
    return env
Example #8
0
def create_environment(config):
    if config.atari_wrapper:
        env = make_atari_env(config.environment, n_envs=config.workers)
        env = VecFrameStack(env, n_stack = 1)
    else:
        env = make_vec_env(config.environment, n_envs=config.workers)
    env = DummyEnvWrapper(env, config.add_stoch)
    return env
Example #9
0
    def create_envs(self,
                    n_envs: int,
                    eval_env: bool = False,
                    no_log: bool = False) -> VecEnv:
        """
        Create the environment and wrap it if necessary.

        :param n_envs:
        :param eval_env: Whether is it an environment used for evaluation or not
        :param no_log: Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: the vectorized environment, with appropriate wrappers
        """
        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else self.save_path

        # env = SubprocVecEnv([make_env(env_id, i, self.seed) for i in range(n_envs)])
        # On most env, SubprocVecEnv does not help and is quite memory hungry
        env = make_vec_env(
            env_id=self.env_id,
            n_envs=n_envs,
            seed=self.seed,
            env_kwargs=self.env_kwargs,
            monitor_dir=log_dir,
            wrapper_class=self.env_wrapper,
            vec_env_cls=self.vec_env_class,
            vec_env_kwargs=self.vec_env_kwargs,
        )

        # Special case for GoalEnvs: log success rate too
        if "Neck" in self.env_id or self.is_robotics_env(self.env_id):
            self._log_success_rate(env)

        # Wrap the env into a VecNormalize wrapper if needed
        # and load saved statistics when present
        env = self._maybe_normalize(env, eval_env)

        # Optional Frame-stacking
        if self.frame_stack is not None:
            n_stack = self.frame_stack
            env = VecFrameStack(env, n_stack)
            if self.verbose > 0:
                print(f"Stacking {n_stack} frames")

        # Wrap if needed to re-order channels
        # (switch from channel last to channel first convention)
        if is_image_space(env.observation_space):
            if self.verbose > 0:
                print("Wrapping into a VecTransposeImage")
            env = VecTransposeImage(env)

        # check if wrapper for dict support is needed
        if self.algo == "her":
            if self.verbose > 0:
                print("Wrapping into a ObsDictWrapper")
            env = ObsDictWrapper(env)

        return env
Example #10
0
def atari_make(env_name, scalarize=True, **kwargs):
    from stable_baselines3.common.env_util import make_atari_env
    from stable_baselines3.common.vec_env import VecFrameStack
    env = make_atari_env(env_id=env_name, **kwargs)
    env = VecFrameStack(env, n_stack=4)
    if scalarize:
        from rlberry.wrappers.scalarize import ScalarizeEnvWrapper
        env = ScalarizeEnvWrapper(env)
    return env
Example #11
0
def get_env():
    env = make_atari_env(atari_env_name('pong',
                                        'image',
                                        'v4',
                                        no_frame_skip=True),
                         n_envs=4,
                         seed=0)
    env = VecFrameStack(env, n_stack=4)
    return env
Example #12
0
    def create_env(n_envs, eval_env=False, no_log=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :param no_log: (bool) Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: (Union[gym.Env, VecEnv])
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else save_path

        if n_envs == 1:
            env = SubprocVecEnv(
                [make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, env_kwargs=env_kwargs)]
            )
        else:
            # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
            # On most env, SubprocVecEnv does not help and is quite memory hungry
            env = SubprocVecEnv(
                [
                    make_env(env_id, i, args.seed, log_dir=log_dir, env_kwargs=env_kwargs, wrapper_class=env_wrapper)
                    for i in range(n_envs)
                ]
            )
        if normalize:
            # Copy to avoid changing default values by reference
            local_normalize_kwargs = normalize_kwargs.copy()
            # Do not normalize reward for env used for evaluation
            if eval_env:
                if len(local_normalize_kwargs) > 0:
                    local_normalize_kwargs["norm_reward"] = False
                else:
                    local_normalize_kwargs = {"norm_reward": False}

            if args.verbose > 0:
                if len(local_normalize_kwargs) > 0:
                    print(f"Normalization activated: {local_normalize_kwargs}")
                else:
                    print("Normalizing input and reward")
            env = VecNormalize(env, **local_normalize_kwargs)

        # Optional Frame-stacking
        if hyperparams.get("frame_stack", False):
            n_stack = hyperparams["frame_stack"]
            env = VecFrameStack(env, n_stack)
            print(f"Stacking {n_stack} frames")

        if is_image_space(env.observation_space):
            if args.verbose > 0:
                print("Wrapping into a VecTransposeImage")
            env = VecTransposeImage(env)
        return env
Example #13
0
def atari_env(num_envs=1):
    def env_fn():
        env = gym.make("SpaceInvadersNoFrameskip-v4")
        env = AtariWrapper(env)
        return env

    env = DummyVecEnv([env_fn] * num_envs)
    env = VecFrameStack(env, 4)
    env = VecTransposeImage(env)
    env = VecNormalize(env)
    return env
Example #14
0
    def create_env(n_envs, eval_env=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :return: (Union[gym.Env, VecEnv])
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env else save_path

        if n_envs == 1:
            env = DummyVecEnv([
                make_env(env_id,
                         0,
                         args.seed,
                         wrapper_class=env_wrapper,
                         log_dir=log_dir,
                         env_kwargs=env_kwargs)
            ])
        else:
            # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
            # On most env, SubprocVecEnv does not help and is quite memory hungry
            env = DummyVecEnv([
                make_env(env_id,
                         i,
                         args.seed,
                         log_dir=log_dir,
                         env_kwargs=env_kwargs,
                         wrapper_class=env_wrapper) for i in range(n_envs)
            ])
        if normalize:
            if args.verbose > 0:
                if len(normalize_kwargs) > 0:
                    print(f"Normalization activated: {normalize_kwargs}")
                else:
                    print("Normalizing input and reward")
            env = VecNormalize(env, **normalize_kwargs)

        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print(f"Stacking {n_stack} frames")

        if is_image_space(env.observation_space):
            if args.verbose > 0:
                print("Wrapping into a VecTransposeImage")
            env = VecTransposeImage(env)
        return env
Example #15
0
def make_env(env_id, 
            n_envs, 
            frame_stack = True,
            clip_reward = False,
            terminal_on_life_loss = False,
            monitor_dir = './log/monitors', 
            vec_env_cls = SubprocVecEnv):

    wrapper_kwargs = {'terminal_on_life_loss':terminal_on_life_loss, 'clip_reward':clip_reward}

    env = make_atari_env(env_id, n_envs, monitor_dir = monitor_dir, vec_env_cls = vec_env_cls, wrapper_kwargs = wrapper_kwargs)
    env = VecFrameStack(env, 4)
    env = VecTransposeImage(env)
Example #16
0
def create_vectorized_environment(
        n_envs: int, frame_stack: int,
        env_creation_func: t.Callable) -> VecTransposeImage:
    """Creates a vectorized environment for image-based models.

    :param n_envs: The number of parallel environment to run.
    :param frame_stack: The number of frame to stack in each environment.
    :param env_creation_func: A callable returning a Gym environment.
    :return: A vectorized environment with frame stacking and image transposition.
    """
    return VecTransposeImage(
        VecFrameStack(SubprocVecEnv([env_creation_func] * n_envs),
                      frame_stack))
    def create_envs(self, n_envs: int, eval_env: bool = False, no_log: bool = False) -> VecEnv:
        """
        Create the environment and wrap it if necessary.
        :param n_envs:
        :param eval_env: Whether is it an environment used for evaluation or not
        :param no_log: Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: the vectorized environment, with appropriate wrappers
        """
        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else self.save_path

        monitor_kwargs = {}
        # Special case for GoalEnvs: log success rate too
        if "Neck" in self.env_id or self.is_robotics_env(self.env_id) or "parking-v0" in self.env_id:
            monitor_kwargs = dict(info_keywords=("is_success",))

        # On most env, SubprocVecEnv does not help and is quite memory hungry
        # therefore we use DummyVecEnv by default
        env = make_vec_env(
            env_id=self.env_id,
            n_envs=n_envs,
            seed=self.seed,
            env_kwargs=self.env_kwargs,
            monitor_dir=None,                       # Avoid useless monitor file spam from plotting
            wrapper_class=self.env_wrapper,
            vec_env_cls=self.vec_env_class,
            vec_env_kwargs=self.vec_env_kwargs,
            monitor_kwargs=monitor_kwargs,
        )

        # Wrap the env into a VecNormalize wrapper if needed
        # and load saved statistics when present
        env = self._maybe_normalize(env, eval_env)

        # Optional Frame-stacking
        if self.frame_stack is not None:
            n_stack = self.frame_stack
            env = VecFrameStack(env, n_stack)
            if self.verbose > 0:
                print(f"Stacking {n_stack} frames")

        # Wrap if needed to re-order channels
        # (switch from channel last to channel first convention)
        if is_image_space(env.observation_space) and not is_image_space_channels_first(env.observation_space):
            if self.verbose > 0:
                print("Wrapping into a VecTransposeImage")
            env = VecTransposeImage(env)

        return env
Example #18
0
def test_dict_vec_framestack(model_class, channel_last):
    """
    Additional tests to check observation space support
    for Dictionary spaces and VecEnvWrapper using MultiInputPolicy.
    """
    use_discrete_actions = model_class not in [TQC]
    channels_order = {"vec": None, "img": "last" if channel_last else "first"}
    env = DummyVecEnv([
        lambda: SimpleMultiObsEnv(random_start=True,
                                  discrete_actions=use_discrete_actions,
                                  channel_last=channel_last)
    ])

    env = VecFrameStack(env, n_stack=3, channels_order=channels_order)

    kwargs = {}
    n_steps = 256

    if model_class in {}:
        kwargs = dict(
            n_steps=128,
            policy_kwargs=dict(
                net_arch=[32],
                features_extractor_kwargs=dict(cnn_output_dim=32),
            ),
        )
    else:
        # Avoid memory error when using replay buffer
        # Reduce the size of the features and make learning faster
        kwargs = dict(
            buffer_size=250,
            policy_kwargs=dict(
                net_arch=[32],
                features_extractor_kwargs=dict(cnn_output_dim=32),
                n_quantiles=20,
            ),
            train_freq=8,
            gradient_steps=1,
        )
        if model_class == QRDQN:
            kwargs["learning_starts"] = 0

    model = model_class("MultiInputPolicy", env, gamma=0.5, seed=1, **kwargs)

    model.learn(total_timesteps=n_steps)

    evaluate_policy(model, env, n_eval_episodes=5, warn=False)
Example #19
0
def make_atari_default(
    env_id: Union[str, Type[gym.Env]],
    n_envs: int = 1,
    seed: Optional[int] = None,
    start_index: int = 0,
    monitor_dir: Optional[str] = None,
    wrapper_kwargs: Optional[Dict[str, Any]] = None,
    env_kwargs: Optional[Dict[str, Any]] = None,
    vec_env_cls: Optional[Union[DummyVecEnv, SubprocVecEnv]] = DummyVecEnv,
    vec_env_kwargs: Optional[Dict[str, Any]] = None,
) -> VecEnv:
    """
    Create a wrapped, monitored VecEnv for Atari.
    It is a wrapper around ``make_vec_env`` that includes common preprocessing for Atari games.

    :param env_id: the environment ID or the environment class
    :param n_envs: the number of environments you wish to have in parallel
    :param seed: the initial seed for the random number generator
    :param start_index: start rank index
    :param monitor_dir: Path to a folder where the monitor files will be saved.
        If None, no file will be written, however, the env will still be wrapped
        in a Monitor wrapper to provide additional information about training.
    :param wrapper_kwargs: Optional keyword argument to pass to the ``AtariWrapper``
    :param env_kwargs: Optional keyword argument to pass to the env constructor
    :param vec_env_cls: A custom ``VecEnv`` class constructor. Default: None.
    :param vec_env_kwargs: Keyword arguments to pass to the ``VecEnv`` class constructor.
    :return: The wrapped environment
    """
    if wrapper_kwargs is None:
        wrapper_kwargs = {}

    def atari_wrapper(env: gym.Env) -> gym.Env:
        env = AtariWrapper(env, **wrapper_kwargs)
        return env

    return VecFrameStack(make_vec_env_fix(
        env_id,
        n_envs=n_envs,
        seed=seed,
        start_index=start_index,
        monitor_dir=monitor_dir,
        wrapper_class=atari_wrapper,
        env_kwargs=env_kwargs,
        vec_env_cls=vec_env_cls,
        vec_env_kwargs=vec_env_kwargs,
    ),
                         n_stack=4)
Example #20
0
def atari_games_example():
    # There already exists an environment generator that will make and wrap atari environments correctly.
    # Here we are also multi-worker training (n_envs=4 => 4 environments).
    env = make_atari_env("PongNoFrameskip-v4", n_envs=4, seed=0)
    # Frame-stacking with 4 frames.
    env = VecFrameStack(env, n_stack=4)

    model = A2C("CnnPolicy", env, verbose=1)
    model.learn(total_timesteps=25_000)

    obs = env.reset()
    while True:
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        env.render()
Example #21
0
def test_vec_env_is_wrapped():
    # Test is_wrapped call of subproc workers
    def make_env():
        return CustomGymEnv(gym.spaces.Box(low=np.zeros(2), high=np.ones(2)))

    def make_monitored_env():
        return Monitor(
            CustomGymEnv(gym.spaces.Box(low=np.zeros(2), high=np.ones(2))))

    # One with monitor, one without
    vec_env = SubprocVecEnv([make_env, make_monitored_env])

    assert vec_env.env_is_wrapped(Monitor) == [False, True]

    vec_env.close()

    # One with monitor, one without
    vec_env = DummyVecEnv([make_env, make_monitored_env])

    assert vec_env.env_is_wrapped(Monitor) == [False, True]

    vec_env = VecFrameStack(vec_env, n_stack=2)
    assert vec_env.env_is_wrapped(Monitor) == [False, True]
def run_dqn_baseline():
    env = make_atari_env('BreakoutNoFrameskip-v4', n_envs=1, seed=0)
    env = VecFrameStack(env, n_stack=4)
    tensorboard_log = os.path.join(os.path.dirname(__file__), 'runs_baseline')
    buffer_size = 100000
    num_training_steps = 1000000

    model = DQN('CnnPolicy',
                env,
                verbose=0,
                buffer_size=buffer_size,
                learning_starts=50000,
                optimize_memory_usage=False,
                tensorboard_log=tensorboard_log)
    model.learn(total_timesteps=num_training_steps)

    obs = env.reset()
    while True:
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        env.render()
def record_video(env_id,
                 model,
                 video_length=500,
                 prefix='',
                 video_folder='videos/'):
    """
    :param env_id: (str)
    :param model: (RL model)
    :param video_length: (int)
    :param prefix: (str)
    :param video_folder: (str)
    """
    print("Did you even try?")
    eval_env = make_atari_env(env_id, n_envs=nEnv, seed=0)
    eval_env = VecFrameStack(eval_env, n_stack=4)

    obs = eval_env.reset()
    for _ in range(video_length):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = eval_env.step(action)
        eval_env.render()
Example #24
0
def create_test_env(
    env_id: str,
    n_envs: int = 1,
    stats_path: Optional[str] = None,
    seed: int = 0,
    log_dir: Optional[str] = None,
    should_render: bool = True,
    hyperparams: Optional[Dict[str, Any]] = None,
    env_kwargs: Optional[Dict[str, Any]] = None,
) -> VecEnv:
    """
    Create environment for testing a trained agent

    :param env_id:
    :param n_envs: number of processes
    :param stats_path: path to folder containing saved running averaged
    :param seed: Seed for random number generator
    :param log_dir: Where to log rewards
    :param should_render: For Pybullet env, display the GUI
    :param hyperparams: Additional hyperparams (ex: n_stack)
    :param env_kwargs: Optional keyword argument to pass to the env constructor
    :return:
    """
    # Avoid circular import
    from utils.exp_manager import ExperimentManager

    # Create the environment and wrap it if necessary
    env_wrapper = get_wrapper_class(hyperparams)

    hyperparams = {} if hyperparams is None else hyperparams

    if "env_wrapper" in hyperparams.keys():
        del hyperparams["env_wrapper"]

    vec_env_kwargs = {}
    vec_env_cls = DummyVecEnv
    if n_envs > 1 or (ExperimentManager.is_bullet(env_id) and should_render):
        # HACK: force SubprocVecEnv for Bullet env
        # as Pybullet envs does not follow gym.render() interface
        vec_env_cls = SubprocVecEnv
        # start_method = 'spawn' for thread safe

    env = make_vec_env(
        env_id,
        n_envs=n_envs,
        monitor_dir=log_dir,
        seed=seed,
        wrapper_class=env_wrapper,
        env_kwargs=env_kwargs,
        vec_env_cls=vec_env_cls,
        vec_env_kwargs=vec_env_kwargs,
    )

    # Load saved stats for normalizing input and rewards
    # And optionally stack frames
    if stats_path is not None:
        if hyperparams["normalize"]:
            print("Loading running average")
            print(f"with params: {hyperparams['normalize_kwargs']}")
            path_ = os.path.join(stats_path, "vecnormalize.pkl")
            if os.path.exists(path_):
                env = VecNormalize.load(path_, env)
                # Deactivate training and reward normalization
                env.training = False
                env.norm_reward = False
            else:
                raise ValueError(f"VecNormalize stats {path_} not found")

        n_stack = hyperparams.get("frame_stack", 0)
        if n_stack > 0:
            print(f"Stacking {n_stack} frames")
            env = VecFrameStack(env, n_stack)
    return env
Example #25
0
# Effective code with PPO package
import gym

from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import PPO

env = make_atari_env('Assault-v0', n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=1)

model = PPO('MlpPolicy', env, verbose=1, tensorboard_log='./PPO_log/')
model.learn(total_timesteps=int(3e4))

obs = env.reset()
obs_ = obs.transpose(3, 0, 1, 2)

while True:
    action, _states = model.predict(obs_)
    obs, rewards, dones, info = env.step(action)
    env.render()
Example #26
0
def create_test_env(env_id,
                    n_envs=1,
                    stats_path=None,
                    seed=0,
                    log_dir='',
                    should_render=True,
                    hyperparams=None,
                    env_kwargs=None):
    """
    Create environment for testing a trained agent

    :param env_id: (str)
    :param n_envs: (int) number of processes
    :param stats_path: (str) path to folder containing saved running averaged
    :param seed: (int) Seed for random number generator
    :param log_dir: (str) Where to log rewards
    :param should_render: (bool) For Pybullet env, display the GUI
    :param hyperparams: (dict) Additional hyperparams (ex: n_stack)
    :param env_kwargs: (Dict[str, Any]) Optional keyword argument to pass to the env constructor
    :return: (gym.Env)
    """
    # HACK to save logs
    # if log_dir is not None:
    #     os.environ["OPENAI_LOG_FORMAT"] = 'csv'
    #     os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir)
    #     os.makedirs(log_dir, exist_ok=True)
    #     logger.configure()

    # Create the environment and wrap it if necessary
    env_wrapper = get_wrapper_class(hyperparams)
    if 'env_wrapper' in hyperparams.keys():
        del hyperparams['env_wrapper']

    if n_envs > 1:
        # start_method = 'spawn' for thread safe
        env = SubprocVecEnv([
            make_env(env_id,
                     i,
                     seed,
                     log_dir,
                     wrapper_class=env_wrapper,
                     env_kwargs=env_kwargs) for i in range(n_envs)
        ])
    # Pybullet envs does not follow gym.render() interface
    elif "Bullet" in env_id:
        # HACK: force SubprocVecEnv for Bullet env
        env = SubprocVecEnv([
            make_env(env_id,
                     0,
                     seed,
                     log_dir,
                     wrapper_class=env_wrapper,
                     env_kwargs=env_kwargs)
        ])
    else:
        env = DummyVecEnv([
            make_env(env_id,
                     0,
                     seed,
                     log_dir,
                     wrapper_class=env_wrapper,
                     env_kwargs=env_kwargs)
        ])

    # Load saved stats for normalizing input and rewards
    # And optionally stack frames
    if stats_path is not None:
        if hyperparams['normalize']:
            print("Loading running average")
            print("with params: {}".format(hyperparams['normalize_kwargs']))
            env = VecNormalize(env,
                               training=False,
                               **hyperparams['normalize_kwargs'])

            if os.path.exists(os.path.join(stats_path, 'vecnormalize.pkl')):
                env = VecNormalize.load(
                    os.path.join(stats_path, 'vecnormalize.pkl'), env)
                # Deactivate training and reward normalization
                env.training = False
                env.norm_reward = False
            else:
                # Legacy:
                env.load_running_average(stats_path)

        n_stack = hyperparams.get('frame_stack', 0)
        if n_stack > 0:
            print("Stacking {} frames".format(n_stack))
            env = VecFrameStack(env, n_stack)
    return env
Example #27
0
env = RewardWrapper(env)
env = ResizeWrapper(env, shape=(64, 80, 3))

if custom_params['algo'] == 'dqn':
    env = DiscreteWrapper(env)

if custom_params['USING_VAE']:
    env = NormalizeWrapper(env)  # No need to use normalization if image
    env = FinalLayerObservationWrapper(env, latent_dim=1028, map="map3")

# Step 3.b. To make Vectorized Environment to be able to use Normalize or FramStack (Optional)
env = make_vec_env(lambda: env, n_envs=1)
# Step 3.b Passing through Normalization and stack frame (Optional)

env = VecFrameStack(
    env,
    n_stack=custom_params['FRAME_STACK'])  # Use 1 for now because we use image
if not custom_params['USING_VAE']:
    env = VecTransposeImage(env)  # Uncomment if using 3d obs
if custom_params['USING_NORMALIZATION']:
    env = VecNormalize.load(osp.join(results_dir, "vec_normalization.pkl"),
                            env)

# Load the agent
if custom_params['algo'] == 'sac':
    model = SAC.load(osp.join(results_dir, "best_model", "best_model.zip"))
elif custom_params['algo'] == 'a2c':
    model = A2C.load(osp.join(results_dir, "best_model", "best_model.zip"))
elif custom_params['algo'] == 'dqn':
    model = DQN.load(osp.join(results_dir, "best_model", "best_model.zip"))
elif custom_params['algo'] == 'ppo':
        obs, rewards, dones, info = eval_env.step(action)
        eval_env.render()

    # Close the video recorder
    # eval_env.close()


# Stack 4 frames
env_id = 'PongNoFrameskip-v4'
video_folder = 'logs/videos/'
video_length = 1000
nEnv = 8
startFresh = False
if (startFresh):
    env = make_atari_env(env_id, n_envs=nEnv, seed=0)
    env = VecFrameStack(env, n_stack=4)
    env.reset()
    model = A2C('CnnPolicy', env, verbose=1)
    model.learn(total_timesteps=25000)
    model.save("a2c_pong_{}".format(model.num_timesteps))
    record_video(env_id,
                 model,
                 video_length=500,
                 prefix='ac2_' + env_id,
                 video_folder='videos/')
else:
    env = make_atari_env(env_id, n_envs=nEnv, seed=0)
    env = VecFrameStack(env, n_stack=4)
    env.reset()
    trained_model = A2C.load("a2c_pong_200000", verbose=1)
    trained_model.set_env(env)
Example #29
0
def run(policy,
        envname,
        learning_rate,
        n_steps,
        epochs,
        gamma,
        gae_lambda,
        ent_coef,
        vf_coef,
        max_grad_norm,
        normalize_advantage,
        policy_kwargs,
        n_eval_episodes,
        eval_freq,
        n_envs,
        n_stack,
        total_timesteps,
        log_interval,
        device="cuda",
        verbose=True,
        tensorboard_log="logs/"):

    # Normalize with multi environments
    seed = np.random.randint(1, 2**16)
    all_args = locals()

    path = "/" + os.path.join(*sb3.__file__.split("/")[:-2])
    commit_num = subprocess.check_output(["git", "describe", "--always"],
                                         cwd=path).strip().decode()

    env = make_atari_env(envname, n_envs=n_envs, seed=seed)
    env = VecFrameStack(env, n_stack=n_stack)

    # Callbacks
    loggercallback = LoggerCallback("json", [("arguments", all_args),
                                             ("git", commit_num)])

    # No seed as the evaluation has no effect on training or pruning
    evalcallback = EvalCallback(make_atari_env(envname,
                                               vec_env_cls=SubprocVecEnv),
                                n_eval_episodes=n_eval_episodes,
                                eval_freq=eval_freq)

    # Initiate the model and start learning
    model = A2C(policy,
                env,
                learning_rate,
                n_steps,
                epochs,
                gamma,
                gae_lambda,
                ent_coef,
                vf_coef,
                max_grad_norm,
                normalize_advantage,
                policy_kwargs,
                verbose=verbose,
                tensorboard_log=tensorboard_log,
                seed=seed,
                device="cuda")

    model.learn(
        total_timesteps=total_timesteps,
        log_interval=log_interval,
        callback=[loggercallback, evalcallback],
        tb_log_name=envname,
    )
    model.env.close()
    evalcallback.eval_env.close()

    return evalcallback.best_mean_reward
Example #30
0
def env_constructor(n_envs=4):
    env = make_atari_env("MontezumaRevenge-v0", n_envs=n_envs)
    env = VecFrameStack(env, n_stack=4)
    return env