Esempio n. 1
0
 def thunk():
     env = gym.make(gym_id)
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=4)
     env = gym.wrappers.RecordEpisodeStatistics(env)
     if args.capture_video:
         if idx == 0:
             env = Monitor(env, f"videos/{experiment_name}")
     env = EpisodicLifeEnv(env)
     if "FIRE" in env.unwrapped.get_action_meanings():
         env = FireResetEnv(env)
     env = WarpFrame(env, width=84, height=84)
     env = ClipRewardEnv(env)
     env.seed(seed)
     env.action_space.seed(seed)
     env.observation_space.seed(seed)
     return env
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dmc2gym.make(domain_name=domain, task_name=task)
            env = ClipAction(env)
        elif env_id.startswith("rrc"):
            _, ac_type, ac_wrapper = env_id.split('.')
            ts_relative, sa_relative = False, False
            scaled_ac, task_space = False, False
            if ac_wrapper.split('-')[0] == 'task':
                task_space = True
                ts_relative = ac_wrapper.split('-')[-1] == 'rel'
            elif ac_wrapper.split('-')[0] == 'scaled':
                scaled_ac = True
                sa_relative = ac_wrapper.split('-')[-1] == 'rel'
            env = rrc_utils.build_env_fn(
                    action_type=ac_type, initializer=None, scaled_ac=scaled_ac,
                    task_space=task_space, sa_relative=sa_relative,
                    ts_relative=ts_relative, goal_relative=True,
                    rew_fn='step')()
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = NoopResetEnv(env, noop_max=30)
            env = MaxAndSkipEnv(env, skip=4)

        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = Monitor(env,
                          os.path.join(log_dir, str(rank)),
                          allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = EpisodicLifeEnv(env)
                if "FIRE" in env.unwrapped.get_action_meanings():
                    env = FireResetEnv(env)
                env = WarpFrame(env, width=84, height=84)
                env = ClipRewardEnv(env)
        elif len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
Esempio n. 3
0
    def _thunk():
        print(f"Using {env_id} environment")
        if env_id == "Warehouse":
            env = Warehouse(parameters)
        elif env_id == 'Sumo':
            # todo currently just using loop_network scene
            params = {'scene': "loop_network", 'libsumo': True}
            env = LoopNetwork(seed, params)
        else:
            if env_id.startswith("dm"):
                _, domain, task = env_id.split('.')
                env = dmc2gym.make(domain_name=domain, task_name=task)
                env = ClipAction(env)
            else:
                env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = NoopResetEnv(env, noop_max=30)
            env = MaxAndSkipEnv(env, skip=4)

        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if env_id not in ["Warehouse", "Sumo"]:
            if log_dir is not None:
                env = Monitor(env,
                              os.path.join(log_dir, str(rank)),
                              allow_early_resets=allow_early_resets)

            if is_atari:
                if len(env.observation_space.shape) == 3:
                    env = EpisodicLifeEnv(env)
                    if "FIRE" in env.unwrapped.get_action_meanings():
                        env = FireResetEnv(env)
                    env = WarpFrame(env, width=84, height=84)
                    env = ClipRewardEnv(env)
            elif len(env.observation_space.shape) == 3:
                raise NotImplementedError(
                    "CNN models work only for atari,\n"
                    "please use a custom wrapper for a custom pixel input env.\n"
                    "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
Esempio n. 4
0
    def _thunk():
        if isinstance(env_id, Callable):
            env = env_id(**kwargs)

        elif env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dmc2gym.make(domain_name=domain, task_name=task)
            env = ClipAction(env)

        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = NoopResetEnv(env, noop_max=30)
            env = MaxAndSkipEnv(env, skip=4)

        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = Monitor(env,
                          os.path.join(log_dir, str(rank)),
                          allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = EpisodicLifeEnv(env)
                if "FIRE" in env.unwrapped.get_action_meanings():
                    env = FireResetEnv(env)
                env = WarpFrame(env, width=84, height=84)
                env = ClipRewardEnv(env)
        elif env.observation_space.shape and len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        if env.observation_space.shape:
            obs_shape = env.observation_space.shape
            if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
                env = TransposeImage(env, op=[2, 0, 1])

        return env
Esempio n. 5
0
 def thunk():
     env = gym.make(gym_id)
     env = gym.wrappers.RecordEpisodeStatistics(env)
     if capture_video:
         if idx == 0:
             env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=4)
     env = EpisodicLifeEnv(env)
     if "FIRE" in env.unwrapped.get_action_meanings():
         env = FireResetEnv(env)
     env = ClipRewardEnv(env)
     env = gym.wrappers.ResizeObservation(env, (84, 84))
     env = gym.wrappers.GrayScaleObservation(env)
     env = gym.wrappers.FrameStack(env, 4)
     env.seed(seed)
     env.action_space.seed(seed)
     env.observation_space.seed(seed)
     return env
Esempio n. 6
0
    def __init__(
        self,
        env: gym.Env,
        noop_max: int = 30,
        frame_skip: int = 4,
        screen_sizeH: int = 116,
        screen_sizeW: int = 124,
        terminal_on_life_loss: bool = True,
        clip_reward: bool = True,
    ):
        env = NoopResetEnv(env, noop_max=noop_max)
        env = MaxAndSkipEnv(env, skip=frame_skip)
        if terminal_on_life_loss:
            env = SMEpisodicLifeEnv(env)
        env = WarpFrame(env, width=screen_sizeW, height=screen_sizeH)
        env = RewardWraper(env)
        if clip_reward:
            env = ClipRewardEnv(env)

        super().__init__(env)
Esempio n. 7
0
def make_env_all_params(rank, args):
    """Initialize the environment and apply wrappers.

    Parameters
    ----------
    rank :
        Rank of the environment.
    args :
        Hyperparameters for this run.

    Returns
    -------
    env
        Environment with its individual wrappers.

    """
    if args.env_kind == "atari":
        from stable_baselines3.common.atari_wrappers import NoopResetEnv

        from nupic.embodied.disagreement.envs.wrappers import (
            AddRandomStateToInfo,
            ExtraTimeLimit,
            FrameStack,
            MaxAndSkipEnv,
            MontezumaInfoWrapper,
            ProcessFrame84,
            StickyActionEnv,
        )
        env = gym.make(args.env)
        assert "NoFrameskip" in env.spec.id
        if args.stickyAtari:
            env._max_episode_steps = args.max_episode_steps * 4
            env = StickyActionEnv(env)
        else:
            env = NoopResetEnv(env, noop_max=args.noop_max)
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        if not args.stickyAtari:
            env = ExtraTimeLimit(env, args.max_episode_steps)
        if "Montezuma" in args.env:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args.env_kind == "mario":
        from nupic.embodied.disagreement.envs.wrappers import make_mario_env
        env = make_mario_env()
    elif args.env_kind == "retro_multi":
        from nupic.embodied.disagreement.envs.wrappers import make_multi_pong
        env = make_multi_pong()
    elif args.env_kind == "roboarm":
        from real_robots.envs import REALRobotEnv

        from nupic.embodied.disagreement.envs.wrappers import CartesianControlDiscrete
        env = REALRobotEnv(objects=3, action_type="cartesian")
        env = CartesianControlDiscrete(
            env,
            crop_obs=args.crop_obs,
            repeat=args.act_repeat,
            touch_reward=args.touch_reward,
            random_force=args.random_force,
        )
        if args.resize_obs > 0:
            env = ResizeObservation(env, args.resize_obs)

    print("adding monitor")
    env = Monitor(env, filename=None)
    return env