def __init__(self, domain_name, task_name, horizon, gamma, task_kwargs=None, dt=.01, width_screen=480, height_screen=480, camera_id=0): """ Constructor. Args: domain_name (str): name of the environment; task_name (str): name of the task of the environment; horizon (int): the horizon; gamma (float): the discount factor; task_kwargs (dict, None): parameters of the task; dt (float, .01): duration of a control step; width_screen (int, 480): width of the screen; height_screen (int, 480): height of the screen; camera_id (int, 0): position of camera to render the environment; """ # MDP creation if task_kwargs is None: task_kwargs = dict() task_kwargs[ 'time_limit'] = np.inf # Hack to ignore dm_control time limit. self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs) # MDP properties action_space = self._convert_action_space(self.env.action_spec()) observation_space = self._convert_observation_space( self.env.observation_spec()) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) self._viewer = ImageViewer((width_screen, height_screen), dt) self._camera_id = camera_id super().__init__(mdp_info)
def __init__(self, domain_name, task_name, horizon=None, gamma=0.99, task_kwargs=None, dt=.01, width_screen=480, height_screen=480, camera_id=0, use_pixels=False, pixels_width=64, pixels_height=64): """ Constructor. Args: domain_name (str): name of the environment; task_name (str): name of the task of the environment; horizon (int): the horizon; gamma (float): the discount factor; task_kwargs (dict, None): parameters of the task; dt (float, .01): duration of a control step; width_screen (int, 480): width of the screen; height_screen (int, 480): height of the screen; camera_id (int, 0): position of camera to render the environment; use_pixels (bool, False): if True, pixel observations are used rather than the state vector; pixels_width (int, 64): width of the pixel observation; pixels_height (int, 64): height of the pixel observation; """ # MDP creation self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs) if use_pixels: self.env = pixels.Wrapper(self.env, render_kwargs={ 'width': pixels_width, 'height': pixels_height }) # get the default horizon if horizon is None: horizon = self.env._step_limit # Hack to ignore dm_control time limit. self.env._step_limit = np.inf if use_pixels: self._convert_observation_space = self._convert_observation_space_pixels self._convert_observation = self._convert_observation_pixels else: self._convert_observation_space = self._convert_observation_space_vector self._convert_observation = self._convert_observation_vector # MDP properties action_space = self._convert_action_space(self.env.action_spec()) observation_space = self._convert_observation_space( self.env.observation_spec()) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) self._viewer = ImageViewer((width_screen, height_screen), dt) self._camera_id = camera_id super().__init__(mdp_info) self._state = None
class DMControl(Environment): """ Interface for dm_control suite Mujoco environments. It makes it possible to use every dm_control suite Mujoco environment just providing the necessary information. """ def __init__(self, domain_name, task_name, horizon=None, gamma=0.99, task_kwargs=None, dt=.01, width_screen=480, height_screen=480, camera_id=0, use_pixels=False, pixels_width=64, pixels_height=64): """ Constructor. Args: domain_name (str): name of the environment; task_name (str): name of the task of the environment; horizon (int): the horizon; gamma (float): the discount factor; task_kwargs (dict, None): parameters of the task; dt (float, .01): duration of a control step; width_screen (int, 480): width of the screen; height_screen (int, 480): height of the screen; camera_id (int, 0): position of camera to render the environment; use_pixels (bool, False): if True, pixel observations are used rather than the state vector; pixels_width (int, 64): width of the pixel observation; pixels_height (int, 64): height of the pixel observation; """ # MDP creation self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs) if use_pixels: self.env = pixels.Wrapper(self.env, render_kwargs={ 'width': pixels_width, 'height': pixels_height }) # get the default horizon if horizon is None: horizon = self.env._step_limit # Hack to ignore dm_control time limit. self.env._step_limit = np.inf if use_pixels: self._convert_observation_space = self._convert_observation_space_pixels self._convert_observation = self._convert_observation_pixels else: self._convert_observation_space = self._convert_observation_space_vector self._convert_observation = self._convert_observation_vector # MDP properties action_space = self._convert_action_space(self.env.action_spec()) observation_space = self._convert_observation_space( self.env.observation_spec()) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) self._viewer = ImageViewer((width_screen, height_screen), dt) self._camera_id = camera_id super().__init__(mdp_info) self._state = None def reset(self, state=None): if state is None: self._state = self._convert_observation( self.env.reset().observation) else: raise NotImplementedError return self._state def step(self, action): step = self.env.step(action) reward = step.reward self._state = self._convert_observation(step.observation) absorbing = step.last() return self._state, reward, absorbing, {} def render(self): img = self.env.physics.render(self._viewer.size[1], self._viewer.size[0], self._camera_id) self._viewer.display(img) def stop(self): self._viewer.close() @staticmethod def _convert_observation_space_vector(observation_space): observation_shape = 0 for i in observation_space: shape = observation_space[i].shape observation_var = 1 for dim in shape: observation_var *= dim observation_shape += observation_var return Box(low=-np.inf, high=np.inf, shape=(observation_shape, )) @staticmethod def _convert_observation_space_pixels(observation_space): img_size = observation_space['pixels'].shape return Box(low=0., high=255., shape=(3, img_size[0], img_size[1])) @staticmethod def _convert_action_space(action_space): low = action_space.minimum high = action_space.maximum return Box(low=np.array(low), high=np.array(high)) @staticmethod def _convert_observation_vector(observation): obs = list() for i in observation: obs.append(np.atleast_1d(observation[i]).flatten()) return np.concatenate(obs) @staticmethod def _convert_observation_pixels(observation): return observation['pixels'].transpose((2, 0, 1))
def __init__(self, config_file, horizon=None, gamma=0.99, is_discrete=False, width=None, height=None, debug_gui=False, verbose=False): """ Constructor. Args: config_file (str): path to the YAML file specifying the task (see igibson/examples/configs/ and igibson/test/); horizon (int, None): the horizon; gamma (float, 0.99): the discount factor; is_discrete (bool, False): if True, actions are automatically discretized by iGibson's `set_up_discrete_action_space`. Please note that not all robots support discrete actions. width (int, None): width of the pixel observation. If None, the value specified in the config file is used; height (int, None): height of the pixel observation. If None, the value specified in the config file is used; debug_gui (bool, False): if True, activate the iGibson in GUI mode, showing the pybullet rendering and the robot camera. verbose (bool, False): if False, it disable iGibson default messages. """ if not verbose: logging.disable(logging.CRITICAL + 1) # Disable iGibson log messages # MDP creation self._not_pybullet = False self._first = True config = parse_config(config_file) config['is_discrete'] = is_discrete if horizon is not None: config['max_step'] = horizon else: horizon = config['max_step'] config['max_step'] = horizon + 1 # Hack to ignore gym time limit if width is not None: config['image_width'] = width if height is not None: config['image_height'] = height env = iGibsonEnv(config_file=config, mode='gui' if debug_gui else 'headless') env = iGibsonWrapper(env) self.env = env self._img_size = env.observation_space.shape[0:2] # MDP properties action_space = self.env.action_space observation_space = Box(low=0., high=255., shape=(3, self._img_size[1], self._img_size[0])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a self._viewer = ImageViewer((self._img_size[1], self._img_size[0]), 1 / 60) self._image = None Environment.__init__(self, mdp_info)
class iGibson(Gym): """ Interface for iGibson https://github.com/StanfordVL/iGibson There are both navigation and interaction tasks. Observations are pixel images of what the agent sees in front of itself. Image resolution is specified in the config file. By default, actions are continuous, but can be discretized automatically using a flag. Note that not all robots support discrete actions. Scene and task details are defined in the YAML config file. """ def __init__(self, config_file, horizon=None, gamma=0.99, is_discrete=False, width=None, height=None, debug_gui=False, verbose=False): """ Constructor. Args: config_file (str): path to the YAML file specifying the task (see igibson/examples/configs/ and igibson/test/); horizon (int, None): the horizon; gamma (float, 0.99): the discount factor; is_discrete (bool, False): if True, actions are automatically discretized by iGibson's `set_up_discrete_action_space`. Please note that not all robots support discrete actions. width (int, None): width of the pixel observation. If None, the value specified in the config file is used; height (int, None): height of the pixel observation. If None, the value specified in the config file is used; debug_gui (bool, False): if True, activate the iGibson in GUI mode, showing the pybullet rendering and the robot camera. verbose (bool, False): if False, it disable iGibson default messages. """ if not verbose: logging.disable(logging.CRITICAL + 1) # Disable iGibson log messages # MDP creation self._not_pybullet = False self._first = True config = parse_config(config_file) config['is_discrete'] = is_discrete if horizon is not None: config['max_step'] = horizon else: horizon = config['max_step'] config['max_step'] = horizon + 1 # Hack to ignore gym time limit if width is not None: config['image_width'] = width if height is not None: config['image_height'] = height env = iGibsonEnv(config_file=config, mode='gui' if debug_gui else 'headless') env = iGibsonWrapper(env) self.env = env self._img_size = env.observation_space.shape[0:2] # MDP properties action_space = self.env.action_space observation_space = Box(low=0., high=255., shape=(3, self._img_size[1], self._img_size[0])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a self._viewer = ImageViewer((self._img_size[1], self._img_size[0]), 1 / 60) self._image = None Environment.__init__(self, mdp_info) def reset(self, state=None): assert state is None, 'Cannot set iGibson state' return self._convert_observation(np.atleast_1d(self.env.reset())) def step(self, action): action = self._convert_action(action) obs, reward, absorbing, info = self.env.step(action) self._image = obs.copy() return self._convert_observation( np.atleast_1d(obs)), reward, absorbing, info def close(self): self.env.close() def stop(self): self._viewer.close() def render(self, mode='human'): self._viewer.display(self._image) @staticmethod def _convert_observation(observation): return observation.transpose((2, 0, 1)) @staticmethod def root_path(): return igibson.root_path
def __init__(self, wrapper, config_file, base_config_file=None, horizon=None, gamma=0.99, width=None, height=None): """ Constructor. For more details on how to pass YAML configuration files, please see <MUSHROOM_RL PATH>/examples/habitat/README.md Args: wrapper (str): wrapper for converting observations and actions (e.g., HabitatRearrangeWrapper); config_file (str): path to the YAML file specifying the RL task configuration (see <HABITAT_LAB PATH>/habitat_baselines/configs/); base_config_file (str, None): path to an optional YAML file, used as 'BASE_TASK_CONFIG_PATH' in the first YAML (see <HABITAT_LAB PATH>/configs/); horizon (int, None): the horizon; gamma (float, 0.99): the discount factor; width (int, None): width of the pixel observation. If None, the value specified in the config file is used. height (int, None): height of the pixel observation. If None, the value specified in the config file is used. """ # MDP creation self._not_pybullet = False self._first = True if base_config_file is None: base_config_file = config_file config = get_config(config_paths=config_file, opts=['BASE_TASK_CONFIG_PATH', base_config_file]) config.defrost() if horizon is None: horizon = config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS # Get the default horizon config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = horizon + 1 # Hack to ignore gym time limit # Overwrite all RGB width / height used for the TASK (not SIMULATOR) for k in config['TASK_CONFIG']['SIMULATOR']: if 'rgb' in k.lower(): if height is not None: config['TASK_CONFIG']['SIMULATOR'][k]['HEIGHT'] = height if width is not None: config['TASK_CONFIG']['SIMULATOR'][k]['WIDTH'] = width config.freeze() env_class = get_env_class(config.ENV_NAME) env = make_env_fn(env_class=env_class, config=config) env = globals()[wrapper](env) self.env = env self._img_size = env.observation_space.shape[0:2] # MDP properties action_space = self.env.action_space observation_space = Box(low=0., high=255., shape=(3, self._img_size[1], self._img_size[0])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a self._viewer = ImageViewer((self._img_size[1], self._img_size[0]), 1 / 10) Environment.__init__(self, mdp_info)
class Habitat(Gym): """ Interface for Habitat RL environments. This class is very generic and can be used for many Habitat task. Depending on the robot / task, you have to use different wrappers, since observation and action spaces may vary. See <MUSHROOM_RL PATH>/examples/habitat/ for more details. """ def __init__(self, wrapper, config_file, base_config_file=None, horizon=None, gamma=0.99, width=None, height=None): """ Constructor. For more details on how to pass YAML configuration files, please see <MUSHROOM_RL PATH>/examples/habitat/README.md Args: wrapper (str): wrapper for converting observations and actions (e.g., HabitatRearrangeWrapper); config_file (str): path to the YAML file specifying the RL task configuration (see <HABITAT_LAB PATH>/habitat_baselines/configs/); base_config_file (str, None): path to an optional YAML file, used as 'BASE_TASK_CONFIG_PATH' in the first YAML (see <HABITAT_LAB PATH>/configs/); horizon (int, None): the horizon; gamma (float, 0.99): the discount factor; width (int, None): width of the pixel observation. If None, the value specified in the config file is used. height (int, None): height of the pixel observation. If None, the value specified in the config file is used. """ # MDP creation self._not_pybullet = False self._first = True if base_config_file is None: base_config_file = config_file config = get_config(config_paths=config_file, opts=['BASE_TASK_CONFIG_PATH', base_config_file]) config.defrost() if horizon is None: horizon = config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS # Get the default horizon config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = horizon + 1 # Hack to ignore gym time limit # Overwrite all RGB width / height used for the TASK (not SIMULATOR) for k in config['TASK_CONFIG']['SIMULATOR']: if 'rgb' in k.lower(): if height is not None: config['TASK_CONFIG']['SIMULATOR'][k]['HEIGHT'] = height if width is not None: config['TASK_CONFIG']['SIMULATOR'][k]['WIDTH'] = width config.freeze() env_class = get_env_class(config.ENV_NAME) env = make_env_fn(env_class=env_class, config=config) env = globals()[wrapper](env) self.env = env self._img_size = env.observation_space.shape[0:2] # MDP properties action_space = self.env.action_space observation_space = Box(low=0., high=255., shape=(3, self._img_size[1], self._img_size[0])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a self._viewer = ImageViewer((self._img_size[1], self._img_size[0]), 1 / 10) Environment.__init__(self, mdp_info) def reset(self, state=None): assert state is None, 'Cannot set Habitat state' obs = self._convert_observation(np.atleast_1d(self.env.reset())) return obs def step(self, action): action = self._convert_action(action) obs, reward, absorbing, info = self.env.step(action) return self._convert_observation( np.atleast_1d(obs)), reward, absorbing, info def stop(self): self._viewer.close() def render(self, mode='rgb_array'): if mode == "rgb_array": frame = observations_to_image( self.env._last_full_obs, self.env.unwrapped._env.get_metrics()) else: raise ValueError(f"Render mode {mode} not currently supported.") self._viewer.display(frame) @staticmethod def _convert_observation(observation): return observation.transpose((2, 0, 1)) @staticmethod def root_path(): return os.path.dirname(os.path.dirname(habitat.__file__))
class DMControl(Environment): """ Interface for dm_control suite Mujoco environments. It makes it possible to use every dm_control suite Mujoco environment just providing the necessary information. """ def __init__(self, domain_name, task_name, horizon, gamma, task_kwargs=None, dt=.01, width_screen=480, height_screen=480, camera_id=0): """ Constructor. Args: domain_name (str): name of the environment; task_name (str): name of the task of the environment; horizon (int): the horizon; gamma (float): the discount factor; task_kwargs (dict, None): parameters of the task; dt (float, .01): duration of a control step; width_screen (int, 480): width of the screen; height_screen (int, 480): height of the screen; camera_id (int, 0): position of camera to render the environment; """ # MDP creation if task_kwargs is None: task_kwargs = dict() task_kwargs[ 'time_limit'] = np.inf # Hack to ignore dm_control time limit. self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs) # MDP properties action_space = self._convert_action_space(self.env.action_spec()) observation_space = self._convert_observation_space( self.env.observation_spec()) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) self._viewer = ImageViewer((width_screen, height_screen), dt) self._camera_id = camera_id super().__init__(mdp_info) def reset(self, state=None): if state is None: self._state = self._convert_observation( self.env.reset().observation) else: raise NotImplementedError return self._state def step(self, action): step = self.env.step(action) reward = step.reward self._state = self._convert_observation(step.observation) absorbing = step.last() return self._state, reward, absorbing, {} def render(self): img = self.env.physics.render(self._viewer.size[1], self._viewer.size[0], self._camera_id) self._viewer.display(img) def stop(self): pass @staticmethod def _convert_observation_space(observation_space): observation_shape = 0 for i in observation_space: shape = observation_space[i].shape if len(shape) > 0: observation_shape += shape[0] else: observation_shape += 1 return Box(low=-np.inf, high=np.inf, shape=(observation_shape, )) @staticmethod def _convert_action_space(action_space): low = action_space.minimum high = action_space.maximum return Box(low=np.array(low), high=np.array(high)) @staticmethod def _convert_observation(observation): obs = list() for i in observation: obs.append(np.atleast_1d(observation[i])) return np.concatenate(obs)