class GymEnvironment(Environment): def __init__(self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, target_success_rate: float = 1.0, additional_simulator_parameters: Dict[str, Any] = {}, seed: Union[None, int] = None, human_control: bool = False, custom_reward_threshold: Union[int, float] = None, random_initialization_steps: int = 1, max_over_num_frames: int = 1, observation_space_type: ObservationSpaceType = None, **kwargs): """ :param level: (str) A string representing the gym level to run. This can also be a LevelSelection object. For example, BreakoutDeterministic-v0 :param frame_skip: (int) The number of frames to skip between any two actions given by the agent. The action will be repeated for all the skipped frames. :param visualization_parameters: (VisualizationParameters) The parameters used for visualizing the environment, such as the render flag, storing videos etc. :param additional_simulator_parameters: (Dict[str, Any]) Any additional parameters that the user can pass to the Gym environment. These parameters should be accepted by the __init__ function of the implemented Gym environment. :param seed: (int) A seed to use for the random number generator when running the environment. :param human_control: (bool) A flag that allows controlling the environment using the keyboard keys. :param custom_reward_threshold: (float) Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment. If not set, this value will be taken from the Gym environment definition. :param random_initialization_steps: (int) The number of random steps that will be taken in the environment after each reset. This is a feature presented in the DQN paper, which improves the variability of the episodes the agent sees. :param max_over_num_frames: (int) This value will be used for merging multiple frames into a single frame by taking the maximum value for each of the pixels in the frame. This is particularly used in Atari games, where the frames flicker, and objects can be seen in one frame but disappear in the next. :param observation_space_type: This value will be used for generating observation space. Allows a custom space. Should be one of ObservationSpaceType. If not specified, observation space is inferred from the number of dimensions of the observation: 1D: Vector space, 3D: Image space if 1 or 3 channels, PlanarMaps space otherwise. """ super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) self.random_initialization_steps = random_initialization_steps self.max_over_num_frames = max_over_num_frames self.additional_simulator_parameters = additional_simulator_parameters # hide warnings gym.logger.set_level(40) """ load and initialize environment environment ids can be defined in 3 ways: 1. Native gym environments like BreakoutDeterministic-v0 for example 2. Custom gym environments written and installed as python packages. This environments should have a python module with a class inheriting gym.Env, implementing the relevant functions (_reset, _step, _render) and defining the observation and action space For example: my_environment_package:MyEnvironmentClass will run an environment defined in the MyEnvironmentClass class 3. Custom gym environments written as an independent module which is not installed. This environments should have a python module with a class inheriting gym.Env, implementing the relevant functions (_reset, _step, _render) and defining the observation and action space. For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an environment defined in the MyEnvironmentClass class which is located in the module in the relative path path_to_my_environment.sub_directory.my_module """ if ':' in self.env_id: # custom environments if '/' in self.env_id or '.' in self.env_id: # environment in a an absolute path module written as a unix path or in a relative path module # written as a python import path env_class = short_dynamic_import(self.env_id) else: # environment in a python package env_class = gym.envs.registration.load(self.env_id) # instantiate the environment try: self.env = env_class(**self.additional_simulator_parameters) except: screen.error( "Failed to instantiate Gym environment class %s with arguments %s" % (env_class, self.additional_simulator_parameters), crash=False) raise else: self.env = gym.make(self.env_id) # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows environment_to_always_use_with_native_rendering = [ 'classic_control', 'mujoco', 'robotics' ] self.native_rendering = self.native_rendering or \ any([env in str(self.env.unwrapped.__class__) for env in environment_to_always_use_with_native_rendering]) if self.native_rendering: if hasattr(self, 'renderer'): self.renderer.close() # seed if self.seed is not None: self.env.seed(self.seed) np.random.seed(self.seed) random.seed(self.seed) # frame skip and max between consecutive frames self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__) self.is_roboschool_env = 'roboschool' in str( self.env.unwrapped.__class__) self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__) if self.is_atari_env: self.env.unwrapped.frameskip = 1 # this accesses the atari env that is wrapped with a timelimit wrapper env if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4: screen.warning( "Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. " "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the " "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4." ) self.frame_skip = 3 self.env = MaxOverFramesAndFrameskipEnvWrapper( self.env, frameskip=self.frame_skip, max_over_num_frames=self.max_over_num_frames) else: self.env.unwrapped.frameskip = self.frame_skip self.state_space = StateSpace({}) # observations if not isinstance(self.env.observation_space, gym.spaces.dict.Dict): state_space = {'observation': self.env.observation_space} else: state_space = self.env.observation_space.spaces for observation_space_name, observation_space in state_space.items(): if observation_space_type == ObservationSpaceType.Tensor: # we consider arbitrary input tensor which does not necessarily represent images self.state_space[ observation_space_name] = TensorObservationSpace( shape=np.array(observation_space.shape), low=observation_space.low, high=observation_space.high) elif observation_space_type == ObservationSpaceType.Image or len( observation_space.shape) == 3: # we assume gym has image observations (with arbitrary number of channels) where their values are # within 0-255, and where the channel dimension is the last dimension if observation_space.shape[-1] in [1, 3]: self.state_space[ observation_space_name] = ImageObservationSpace( shape=np.array(observation_space.shape), high=255, channels_axis=-1) else: # For any number of channels other than 1 or 3, use the generic PlanarMaps space self.state_space[ observation_space_name] = PlanarMapsObservationSpace( shape=np.array(observation_space.shape), low=0, high=255, channels_axis=-1) elif observation_space_type == ObservationSpaceType.Vector or len( observation_space.shape) == 1: self.state_space[ observation_space_name] = VectorObservationSpace( shape=observation_space.shape[0], low=observation_space.low, high=observation_space.high) else: raise screen.error( "Failed to instantiate Gym environment class %s with observation space type %s" % (env_class, observation_space_type), crash=True) if 'desired_goal' in state_space.keys(): self.goal_space = self.state_space['desired_goal'] # actions if type(self.env.action_space) == gym.spaces.box.Box: self.action_space = BoxActionSpace( shape=self.env.action_space.shape, low=self.env.action_space.low, high=self.env.action_space.high) elif type(self.env.action_space) == gym.spaces.discrete.Discrete: actions_description = [] if hasattr(self.env.unwrapped, 'get_action_meanings'): actions_description = self.env.unwrapped.get_action_meanings() self.action_space = DiscreteActionSpace( num_actions=self.env.action_space.n, descriptions=actions_description) else: raise screen.error(( "Failed to instantiate gym environment class {} due to unsupported " "action space {}. Expected BoxActionSpace or DiscreteActionSpace." ).format(env_class, self.env.action_space), crash=True) if self.human_control: # TODO: add this to the action space # map keyboard keys to actions self.key_to_action = {} if hasattr(self.env.unwrapped, 'get_keys_to_action'): self.key_to_action = self.env.unwrapped.get_keys_to_action() else: screen.error( "Error: Environment {} does not support human control.". format(self.env), crash=True) # initialize the state by getting a new state from the environment self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() scale = 1 if self.human_control: scale = 2 if not self.native_rendering: self.renderer.create_screen(image.shape[1] * scale, image.shape[0] * scale) # the info is only updated after the first step self.state = self.step(self.action_space.default_action).next_state self.state_space['measurements'] = VectorObservationSpace( shape=len(self.info.keys())) if self.env.spec and custom_reward_threshold is None: self.reward_success_threshold = self.env.spec.reward_threshold self.reward_space = RewardSpace( 1, reward_success_threshold=self.reward_success_threshold) self.target_success_rate = target_success_rate def _wrap_state(self, state): if not isinstance(self.env.observation_space, gym.spaces.Dict): return {'observation': state} return state def _update_state(self): if self.is_atari_env and hasattr(self, 'current_ale_lives') \ and self.current_ale_lives != self.env.unwrapped.ale.lives(): if self.phase == RunPhase.TRAIN or self.phase == RunPhase.HEATUP: # signal termination for life loss self.done = True elif self.phase == RunPhase.TEST and not self.done: # the episode is not terminated in evaluation, but we need to press fire again self._press_fire() self._update_ale_lives() # TODO: update the measurements if self.state and "desired_goal" in self.state.keys(): self.goal = self.state['desired_goal'] def _take_action(self, action): if type(self.action_space) == BoxActionSpace: action = self.action_space.clip_action_to_space(action) self.state, self.reward, self.done, self.info = self.env.step(action) self.state = self._wrap_state(self.state) def _random_noop(self): # simulate a random initial environment state by stepping for a random number of times between 0 and 30 step_count = 0 random_initialization_steps = random.randint( 0, self.random_initialization_steps) while self.action_space is not None and ( self.state is None or step_count < random_initialization_steps): step_count += 1 self.step(self.action_space.default_action) def _press_fire(self): fire_action = 1 if self.is_atari_env and self.env.unwrapped.get_action_meanings( )[fire_action] == 'FIRE': self.current_ale_lives = self.env.unwrapped.ale.lives() self.step(fire_action) if self.done: self.reset_internal_state() def _update_ale_lives(self): if self.is_atari_env: self.current_ale_lives = self.env.unwrapped.ale.lives() def _restart_environment_episode(self, force_environment_reset=False): # prevent reset of environment if there are ale lives left if (self.is_atari_env and self.env.unwrapped.ale.lives() > 0) \ and not force_environment_reset: self.step(self.action_space.default_action) else: self.state = self.env.reset() self.state = self._wrap_state(self.state) self._update_ale_lives() if self.is_atari_env: self._random_noop() self._press_fire() # initialize the number of lives self._update_ale_lives() def _render(self): self.env.render(mode='human') def get_rendered_image(self): image = self.env.render(mode='rgb_array') return image def get_target_success_rate(self) -> float: return self.target_success_rate def close(self) -> None: """ Clean up to close rendering windows. :return: None """ self.env.close()
class GymEnvironment(Environment): def __init__(self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, additional_simulator_parameters: Dict[str, Any] = None, seed: Union[None, int] = None, human_control: bool = False, custom_reward_threshold: Union[int, float] = None, random_initialization_steps: int = 1, max_over_num_frames: int = 1, **kwargs): super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters) self.random_initialization_steps = random_initialization_steps self.max_over_num_frames = max_over_num_frames self.additional_simulator_parameters = additional_simulator_parameters # hide warnings gym.logger.set_level(40) """ load and initialize environment environment ids can be defined in 3 ways: 1. Native gym environments like BreakoutDeterministic-v0 for example 2. Custom gym environments written and installed as python packages. This environments should have a python module with a class inheriting gym.Env, implementing the relevant functions (_reset, _step, _render) and defining the observation and action space For example: my_environment_package:MyEnvironmentClass will run an environment defined in the MyEnvironmentClass class 3. Custom gym environments written as an independent module which is not installed. This environments should have a python module with a class inheriting gym.Env, implementing the relevant functions (_reset, _step, _render) and defining the observation and action space. For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an environment defined in the MyEnvironmentClass class which is located in the module in the relative path path_to_my_environment.sub_directory.my_module """ if ':' in self.env_id: # custom environments if '/' in self.env_id or '.' in self.env_id: # environment in a an absolute path module written as a unix path or in a relative path module # written as a python import path env_class = short_dynamic_import(self.env_id) else: # environment in a python package env_class = gym.envs.registration.load(self.env_id) # instantiate the environment if self.additional_simulator_parameters: self.env = env_class(**self.additional_simulator_parameters) else: self.env = env_class() else: self.env = gym.make(self.env_id) # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows environment_to_always_use_with_native_rendering = [ 'classic_control', 'mujoco', 'robotics' ] self.native_rendering = self.native_rendering or \ any([env in str(self.env.unwrapped.__class__) for env in environment_to_always_use_with_native_rendering]) if self.native_rendering: if hasattr(self, 'renderer'): self.renderer.close() # seed if self.seed is not None: self.env.seed(self.seed) np.random.seed(self.seed) random.seed(self.seed) # frame skip and max between consecutive frames self.is_robotics_env = 'robotics' in str(self.env.unwrapped.__class__) self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__) self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__) self.timelimit_env_wrapper = self.env if self.is_atari_env: self.env.unwrapped.frameskip = 1 # this accesses the atari env that is wrapped with a timelimit wrapper env if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4: screen.warning( "Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. " "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the " "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4." ) self.frame_skip = 3 self.env = MaxOverFramesAndFrameskipEnvWrapper( self.env, frameskip=self.frame_skip, max_over_num_frames=self.max_over_num_frames) else: self.env.unwrapped.frameskip = self.frame_skip self.state_space = StateSpace({}) # observations if not isinstance(self.env.observation_space, gym.spaces.dict_space.Dict): state_space = {'observation': self.env.observation_space} else: state_space = self.env.observation_space.spaces for observation_space_name, observation_space in state_space.items(): if len(observation_space.shape ) == 3 and observation_space.shape[-1] == 3: # we assume gym has image observations which are RGB and where their values are within 0-255 self.state_space[ observation_space_name] = ImageObservationSpace( shape=np.array(observation_space.shape), high=255, channels_axis=-1) else: self.state_space[ observation_space_name] = VectorObservationSpace( shape=observation_space.shape[0], low=observation_space.low, high=observation_space.high) if 'desired_goal' in state_space.keys(): self.goal_space = self.state_space['desired_goal'] # actions if type(self.env.action_space) == gym.spaces.box.Box: self.action_space = BoxActionSpace( shape=self.env.action_space.shape, low=self.env.action_space.low, high=self.env.action_space.high) elif type(self.env.action_space) == gym.spaces.discrete.Discrete: actions_description = [] if hasattr(self.env.unwrapped, 'get_action_meanings'): actions_description = self.env.unwrapped.get_action_meanings() self.action_space = DiscreteActionSpace( num_actions=self.env.action_space.n, descriptions=actions_description) if self.human_control: # TODO: add this to the action space # map keyboard keys to actions self.key_to_action = {} if hasattr(self.env.unwrapped, 'get_keys_to_action'): self.key_to_action = self.env.unwrapped.get_keys_to_action() # initialize the state by getting a new state from the environment self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() scale = 1 if self.human_control: scale = 2 if not self.native_rendering: self.renderer.create_screen(image.shape[1] * scale, image.shape[0] * scale) # measurements if self.env.spec is not None: self.timestep_limit = self.env.spec.timestep_limit else: self.timestep_limit = None # the info is only updated after the first step self.state = self.step(self.action_space.default_action).next_state self.state_space['measurements'] = VectorObservationSpace( shape=len(self.info.keys())) if self.env.spec and custom_reward_threshold is None: self.reward_success_threshold = self.env.spec.reward_threshold self.reward_space = RewardSpace( 1, reward_success_threshold=self.reward_success_threshold) def _wrap_state(self, state): if not isinstance(self.env.observation_space, gym.spaces.Dict): return {'observation': state} return state def _update_state(self): if self.is_atari_env and hasattr(self, 'current_ale_lives') \ and self.current_ale_lives != self.env.unwrapped.ale.lives(): if self.phase == RunPhase.TRAIN or self.phase == RunPhase.HEATUP: # signal termination for life loss self.done = True elif self.phase == RunPhase.TEST and not self.done: # the episode is not terminated in evaluation, but we need to press fire again self._press_fire() self._update_ale_lives() # TODO: update the measurements if self.state and "desired_goal" in self.state.keys(): self.goal = self.state['desired_goal'] def _take_action(self, action): if type(self.action_space) == BoxActionSpace: action = self.action_space.clip_action_to_space(action) self.state, self.reward, self.done, self.info = self.env.step(action) self.state = self._wrap_state(self.state) def _random_noop(self): # simulate a random initial environment state by stepping for a random number of times between 0 and 30 step_count = 0 random_initialization_steps = random.randint( 0, self.random_initialization_steps) while self.action_space is not None and ( self.state is None or step_count < random_initialization_steps): step_count += 1 self.step(self.action_space.default_action) def _press_fire(self): fire_action = 1 if self.is_atari_env and self.env.unwrapped.get_action_meanings( )[fire_action] == 'FIRE': self.current_ale_lives = self.env.unwrapped.ale.lives() self.step(fire_action) if self.done: self.reset_internal_state() def _update_ale_lives(self): if self.is_atari_env: self.current_ale_lives = self.env.unwrapped.ale.lives() def _restart_environment_episode(self, force_environment_reset=False): # prevent reset of environment if there are ale lives left if (self.is_atari_env and self.env.unwrapped.ale.lives() > 0) \ and not force_environment_reset and not self.timelimit_env_wrapper._past_limit(): self.step(self.action_space.default_action) else: self.state = self.env.reset() self.state = self._wrap_state(self.state) self._update_ale_lives() if self.is_atari_env: self._random_noop() self._press_fire() # initialize the number of lives self._update_ale_lives() def _set_mujoco_camera(self, camera_idx: int): """ This function can be used to set the camera for rendering the mujoco simulator :param camera_idx: The index of the camera to use. Should be defined in the model :return: None """ if self.env.unwrapped.viewer is not None and self.env.unwrapped.viewer.cam.fixedcamid != camera_idx and\ self.env.unwrapped.viewer._ncam > camera_idx: from mujoco_py.generated import const self.env.unwrapped.viewer.cam.type = const.CAMERA_FIXED self.env.unwrapped.viewer.cam.fixedcamid = camera_idx def _get_robotics_image(self): self.env.render() image = self.env.unwrapped._get_viewer().read_pixels( 1600, 900, depth=False)[::-1, :, :] image = scipy.misc.imresize(image, (270, 480, 3)) return image def _render(self): self.env.render(mode='human') # required for setting up a fixed camera for mujoco if self.is_mujoco_env: self._set_mujoco_camera(0) def get_rendered_image(self): if self.is_robotics_env: # necessary for fetch since the rendered image is cropped to an irrelevant part of the simulator image = self._get_robotics_image() else: image = self.env.render(mode='rgb_array') # required for setting up a fixed camera for mujoco if self.is_mujoco_env: self._set_mujoco_camera(0) return image