def make_vec_env(n_envs): vec_env = SubprocVecEnv([make_env() for _ in range(n_envs)]) return vec_env
def init_env(env_type, env_name, env_args, env_num, add_prev_achieved_goal=False, time_unlimited=False, die_penalty=0, relax_discrete=False, action_repeat=1, frame_stack=1, image_args=None, custom_wrapper_path=None, custom_wrapper_args=None, flatten_observation=False): """Function to init environment. WARNING! Wrapper order __is__ important and __must__ be set up carefully! wrappers works like queue: first applied - first executed :param env_type: gym, retro, or path to any environment in form 'folder.sub_folder.file', str :param env_name: :param env_args: :param env_num: number of environments working in parallel :param add_prev_achieved_goal: useful for goal-augmented environment and hindsight :param time_unlimited: :param die_penalty: :param relax_discrete: :param action_repeat: :param frame_stack: :param image_args: dict with keys: convert_to_gray, x_start, x_end, y_start, y_end, x_size, y_size :param custom_wrapper_path: :param custom_wrapper_args: :param flatten_observation: if True then multi-part observation will be flattened, useful for goal-augmented environments :return: environment instance """ def _init_env(): if env_type == 'gym': maker = gym.make env_args['id'] = env_name elif env_type == 'retro': maker = retro.make env_args['game'] = env_name else: module = importlib.import_module(env_type) maker = getattr(module, env_name) _env = maker(**env_args) if flatten_observation: # noinspection PyUnresolvedReferences _env = gym.wrappers.FlattenObservation(_env) if custom_wrapper_path is not None: _env = custom_wrapper(_env, custom_wrapper_path, custom_wrapper_args) if add_prev_achieved_goal: _env = last_achieved_goal_wrapper(_env) if time_unlimited: _env = _env.env if die_penalty != 0: _env = die_penalty_wrapper(_env, die_penalty) if isinstance(_env.action_space, gym.spaces.Box): _env = continuous_action_wrapper( _env) # normalize actions to [-1, +1] if relax_discrete: _env = one_hot_wrapper(_env) # this wrapper is useful even if action_repeat=1, because it supports rendering _env = action_repeat_wrapper(_env, action_repeat) # this is common for image envs if _env.observation_space.shape is not None: if len(_env.observation_space.shape) == 3: _env = image_wrapper(_env, **image_args) _env = frame_stack_wrapper( _env, 4 if frame_stack == 1 else frame_stack) elif frame_stack != 1: _env = frame_stack_wrapper(_env, frame_stack) return _env if env_num > 1: env = SubprocVecEnv([_init_env for _ in range(env_num)]) elif env_num == 1: env = _init_env() else: raise ValueError(f'num_env should be >= 1, got num_env={env_num}') return env
def make_vec_env(n_envs, train): vec_env = SubprocVecEnv( [make_env(i if train else 0, n_envs) for i in range(n_envs)]) return vec_env
def init_env(env_type, env_name, env_args, env_num, time_unlimited=False, die_penalty=0, relax_discrete=False, action_repeat=1, image_args=None, custom_wrapper_path=None, custom_wrapper_args=None): """Function to init environment. WARNING! Wrapper order __is__ important and __must__ be set up carefully! wrappers works like queue: first applied - first executed :param env_type: gym, retro, or path to any environment in form 'folder.sub_folder.file', str :param env_name: :param env_args: :param env_num: number of environments working in parallel :param time_unlimited: :param die_penalty: :param relax_discrete: :param action_repeat: :param image_args: dict with keys: convert_to_gray, x_start, x_end, y_start, y_end, x_size, y_size :param custom_wrapper_path: :param custom_wrapper_args: :return: """ def _init_env(): if env_type == 'gym': maker = gym.make env_args['env_name'] = env_name elif env_type == 'retro': maker = retro.make env_args['game'] = env_name else: module = importlib.import_module(env_type) maker = getattr(module, env_name) _env = maker(**env_args) if custom_wrapper_path is not None: _env = CustomWrapper(_env, custom_wrapper_path, custom_wrapper_args) if time_unlimited: _env = _env.env if die_penalty != 0: _env = DiePenaltyWrapper(_env, die_penalty) if isinstance(_env.action_space, gym.spaces.Box): _env = ContinuousActionWrapper( _env) # normalize actions to [-1, +1] if relax_discrete: _env = OneHotWrapper(_env) # this wrapper is useful even if action_repeat=1, because it supports rendering _env = ActionRepeatWrapper(_env, action_repeat) # this is common for image envs if len(_env.observation_space.shape) == 3: _env = ImageEnvWrapper(_env, **image_args) _env = FrameStackWrapper(_env, 4) return _env if env_num > 1: env = SubprocVecEnv([_init_env for _ in range(env_num)]) elif env_num == 1: env = _init_env() else: raise ValueError(f'num_env should be >= 1, got num_env={env_num}') return env
self._max_episode_steps = 20 def reset(self): self.state = 0 def step(self, *args, **kwargs): self.state += 1 reward = self.state done_ = False info_ = {} return self.state, reward, done_, info_ def init_env(): env = gym.make(env_name) env = RolloutPadWrapper(env, rollout_len) return env if __name__ == '__main__': env_name = 'CartPole-v1' rollout_len = 10 vec_env = SubprocVecEnv([init_env for _ in range(2)]) vec_env.reset() for i in range(100): action = [0] * 5 _, _, done, info = vec_env.step(action) print(i, done, info) vec_env.close()