def thunk(): env = gym.make(gym_id) env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env = gym.wrappers.RecordEpisodeStatistics(env) if args.capture_video: if idx == 0: env = Monitor(env, f"videos/{experiment_name}") env = EpisodicLifeEnv(env) if "FIRE" in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env, width=84, height=84) env = ClipRewardEnv(env) env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env
def _thunk(): if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dmc2gym.make(domain_name=domain, task_name=task) env = ClipAction(env) elif env_id.startswith("rrc"): _, ac_type, ac_wrapper = env_id.split('.') ts_relative, sa_relative = False, False scaled_ac, task_space = False, False if ac_wrapper.split('-')[0] == 'task': task_space = True ts_relative = ac_wrapper.split('-')[-1] == 'rel' elif ac_wrapper.split('-')[0] == 'scaled': scaled_ac = True sa_relative = ac_wrapper.split('-')[-1] == 'rel' env = rrc_utils.build_env_fn( action_type=ac_type, initializer=None, scaled_ac=scaled_ac, task_space=task_space, sa_relative=sa_relative, ts_relative=ts_relative, goal_relative=True, rew_fn='step')() else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env.seed(seed + rank) if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) if log_dir is not None: env = Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = EpisodicLifeEnv(env) if "FIRE" in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env, width=84, height=84) env = ClipRewardEnv(env) elif len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def _thunk(): print(f"Using {env_id} environment") if env_id == "Warehouse": env = Warehouse(parameters) elif env_id == 'Sumo': # todo currently just using loop_network scene params = {'scene': "loop_network", 'libsumo': True} env = LoopNetwork(seed, params) else: if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dmc2gym.make(domain_name=domain, task_name=task) env = ClipAction(env) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env.seed(seed + rank) if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) if env_id not in ["Warehouse", "Sumo"]: if log_dir is not None: env = Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = EpisodicLifeEnv(env) if "FIRE" in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env, width=84, height=84) env = ClipRewardEnv(env) elif len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def _thunk(): if isinstance(env_id, Callable): env = env_id(**kwargs) elif env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dmc2gym.make(domain_name=domain, task_name=task) env = ClipAction(env) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env.seed(seed + rank) if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) if log_dir is not None: env = Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = EpisodicLifeEnv(env) if "FIRE" in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env, width=84, height=84) env = ClipRewardEnv(env) elif env.observation_space.shape and len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions if env.observation_space.shape: obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def thunk(): env = gym.make(gym_id) env = gym.wrappers.RecordEpisodeStatistics(env) if capture_video: if idx == 0: env = gym.wrappers.RecordVideo(env, f"videos/{run_name}") env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env = EpisodicLifeEnv(env) if "FIRE" in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = ClipRewardEnv(env) env = gym.wrappers.ResizeObservation(env, (84, 84)) env = gym.wrappers.GrayScaleObservation(env) env = gym.wrappers.FrameStack(env, 4) env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env
def __init__( self, env: gym.Env, noop_max: int = 30, frame_skip: int = 4, screen_sizeH: int = 116, screen_sizeW: int = 124, terminal_on_life_loss: bool = True, clip_reward: bool = True, ): env = NoopResetEnv(env, noop_max=noop_max) env = MaxAndSkipEnv(env, skip=frame_skip) if terminal_on_life_loss: env = SMEpisodicLifeEnv(env) env = WarpFrame(env, width=screen_sizeW, height=screen_sizeH) env = RewardWraper(env) if clip_reward: env = ClipRewardEnv(env) super().__init__(env)
def make_env_all_params(rank, args): """Initialize the environment and apply wrappers. Parameters ---------- rank : Rank of the environment. args : Hyperparameters for this run. Returns ------- env Environment with its individual wrappers. """ if args.env_kind == "atari": from stable_baselines3.common.atari_wrappers import NoopResetEnv from nupic.embodied.disagreement.envs.wrappers import ( AddRandomStateToInfo, ExtraTimeLimit, FrameStack, MaxAndSkipEnv, MontezumaInfoWrapper, ProcessFrame84, StickyActionEnv, ) env = gym.make(args.env) assert "NoFrameskip" in env.spec.id if args.stickyAtari: env._max_episode_steps = args.max_episode_steps * 4 env = StickyActionEnv(env) else: env = NoopResetEnv(env, noop_max=args.noop_max) env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) if not args.stickyAtari: env = ExtraTimeLimit(env, args.max_episode_steps) if "Montezuma" in args.env: env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) elif args.env_kind == "mario": from nupic.embodied.disagreement.envs.wrappers import make_mario_env env = make_mario_env() elif args.env_kind == "retro_multi": from nupic.embodied.disagreement.envs.wrappers import make_multi_pong env = make_multi_pong() elif args.env_kind == "roboarm": from real_robots.envs import REALRobotEnv from nupic.embodied.disagreement.envs.wrappers import CartesianControlDiscrete env = REALRobotEnv(objects=3, action_type="cartesian") env = CartesianControlDiscrete( env, crop_obs=args.crop_obs, repeat=args.act_repeat, touch_reward=args.touch_reward, random_force=args.random_force, ) if args.resize_obs > 0: env = ResizeObservation(env, args.resize_obs) print("adding monitor") env = Monitor(env, filename=None) return env