def test_box(): # simple action space action_space = BoxActionSpace(4, -5, 5, ["a", "b", "c", "d"]) for i in range(100): sample = action_space.sample() assert np.all(-5 <= sample) and np.all(sample <= 5) assert sample.shape == (4, ) assert sample.dtype == float # test clipping clipped_action = action_space.clip_action_to_space( np.array([-10, 10, 2, 5])) assert np.all(clipped_action == np.array([-5, 5, 2, 5])) # more complex high and low definition action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), np.array([1, 2, 4, 5]), ["a", "b", "c", "d"]) for i in range(100): sample = action_space.sample() assert np.all(np.array([-5, -1, -0.5, 0]) <= sample) and np.all( sample <= np.array([1, 2, 4, 5])) assert sample.shape == (4, ) assert sample.dtype == float # test clipping clipped_action = action_space.clip_action_to_space( np.array([-10, 10, 2, 5])) assert np.all(clipped_action == np.array([-5, 2, 2, 5])) # mixed high and low definition action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), 5, ["a", "b", "c", "d"]) for i in range(100): sample = action_space.sample() assert np.all(np.array([-5, -1, -0.5, 0]) <= sample) and np.all( sample <= 5) assert sample.shape == (4, ) assert sample.dtype == float # test clipping clipped_action = action_space.clip_action_to_space( np.array([-10, 10, 2, 5])) assert np.all(clipped_action == np.array([-5, 5, 2, 5])) # invalid bounds with pytest.raises(ValueError): action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), -1, ["a", "b", "c", "d"])
class ControlSuiteEnvironment(Environment): def __init__( self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, target_success_rate: float = 1.0, seed: Union[None, int] = None, human_control: bool = False, observation_type: ObservationType = ObservationType.Measurements, custom_reward_threshold: Union[int, float] = None, **kwargs): """ :param level: (str) A string representing the control suite level to run. This can also be a LevelSelection object. For example, cartpole:swingup. :param frame_skip: (int) The number of frames to skip between any two actions given by the agent. The action will be repeated for all the skipped frames. :param visualization_parameters: (VisualizationParameters) The parameters used for visualizing the environment, such as the render flag, storing videos etc. :param target_success_rate: (float) Stop experiment if given target success rate was achieved. :param seed: (int) A seed to use for the random number generator when running the environment. :param human_control: (bool) A flag that allows controlling the environment using the keyboard keys. :param observation_type: (ObservationType) An enum which defines which observation to use. The current options are to use: * Measurements only - a vector of joint torques and similar measurements * Image only - an image of the environment as seen by a camera attached to the simulator * Measurements & Image - both type of observations will be returned in the state using the keys 'measurements' and 'pixels' respectively. :param custom_reward_threshold: (float) Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment. """ super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) self.observation_type = observation_type # load and initialize environment domain_name, task_name = self.env_id.split(":") self.env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs={'random': seed}) if observation_type != ObservationType.Measurements: self.env = pixels.Wrapper( self.env, pixels_only=observation_type == ObservationType.Image) # seed if self.seed is not None: np.random.seed(self.seed) random.seed(self.seed) self.state_space = StateSpace({}) # image observations if observation_type != ObservationType.Measurements: self.state_space['pixels'] = ImageObservationSpace( shape=self.env.observation_spec()['pixels'].shape, high=255) # measurements observations if observation_type != ObservationType.Image: measurements_space_size = 0 measurements_names = [] for observation_space_name, observation_space in self.env.observation_spec( ).items(): if len(observation_space.shape) == 0: measurements_space_size += 1 measurements_names.append(observation_space_name) elif len(observation_space.shape) == 1: measurements_space_size += observation_space.shape[0] measurements_names.extend([ "{}_{}".format(observation_space_name, i) for i in range(observation_space.shape[0]) ]) self.state_space['measurements'] = VectorObservationSpace( shape=measurements_space_size, measurements_names=measurements_names) # actions self.action_space = BoxActionSpace( shape=self.env.action_spec().shape[0], low=self.env.action_spec().minimum, high=self.env.action_spec().maximum) # initialize the state by getting a new state from the environment self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() scale = 1 if self.human_control: scale = 2 if not self.native_rendering: self.renderer.create_screen(image.shape[1] * scale, image.shape[0] * scale) self.target_success_rate = target_success_rate def _update_state(self): self.state = {} if self.observation_type != ObservationType.Measurements: self.pixels = self.last_result.observation['pixels'] self.state['pixels'] = self.pixels if self.observation_type != ObservationType.Image: self.measurements = np.array([]) for sub_observation in self.last_result.observation.values(): if isinstance(sub_observation, np.ndarray) and len( sub_observation.shape) == 1: self.measurements = np.concatenate( (self.measurements, sub_observation)) else: self.measurements = np.concatenate( (self.measurements, np.array([sub_observation]))) self.state['measurements'] = self.measurements self.reward = self.last_result.reward if self.last_result.reward is not None else 0 self.done = self.last_result.last() def _take_action(self, action): if type(self.action_space) == BoxActionSpace: action = self.action_space.clip_action_to_space(action) self.last_result = self.env.step(action) def _restart_environment_episode(self, force_environment_reset=False): self.last_result = self.env.reset() def _render(self): pass def get_rendered_image(self): return self.env.physics.render(camera_id=0) def get_target_success_rate(self) -> float: return self.target_success_rate
class ControlSuiteEnvironment(Environment): def __init__( self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, seed: Union[None, int] = None, human_control: bool = False, observation_type: ObservationType = ObservationType.Measurements, custom_reward_threshold: Union[int, float] = None, **kwargs): super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters) self.observation_type = observation_type # load and initialize environment domain_name, task_name = self.env_id.split(":") self.env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs={'random': seed}) if observation_type != ObservationType.Measurements: self.env = pixels.Wrapper( self.env, pixels_only=observation_type == ObservationType.Image) # seed if self.seed is not None: np.random.seed(self.seed) random.seed(self.seed) self.state_space = StateSpace({}) # image observations if observation_type != ObservationType.Measurements: self.state_space['pixels'] = ImageObservationSpace( shape=self.env.observation_spec()['pixels'].shape, high=255) # measurements observations if observation_type != ObservationType.Image: measurements_space_size = 0 measurements_names = [] for observation_space_name, observation_space in self.env.observation_spec( ).items(): if len(observation_space.shape) == 0: measurements_space_size += 1 measurements_names.append(observation_space_name) elif len(observation_space.shape) == 1: measurements_space_size += observation_space.shape[0] measurements_names.extend([ "{}_{}".format(observation_space_name, i) for i in range(observation_space.shape[0]) ]) self.state_space['measurements'] = VectorObservationSpace( shape=measurements_space_size, measurements_names=measurements_names) # actions self.action_space = BoxActionSpace( shape=self.env.action_spec().shape[0], low=self.env.action_spec().minimum, high=self.env.action_spec().maximum) # initialize the state by getting a new state from the environment self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() scale = 1 if self.human_control: scale = 2 if not self.native_rendering: self.renderer.create_screen(image.shape[1] * scale, image.shape[0] * scale) def _update_state(self): self.state = {} if self.observation_type != ObservationType.Measurements: self.pixels = self.last_result.observation['pixels'] self.state['pixels'] = self.pixels if self.observation_type != ObservationType.Image: self.measurements = np.array([]) for sub_observation in self.last_result.observation.values(): if isinstance(sub_observation, np.ndarray) and len( sub_observation.shape) == 1: self.measurements = np.concatenate( (self.measurements, sub_observation)) else: self.measurements = np.concatenate( (self.measurements, np.array([sub_observation]))) self.state['measurements'] = self.measurements self.reward = self.last_result.reward if self.last_result.reward is not None else 0 self.done = self.last_result.last() def _take_action(self, action): if type(self.action_space) == BoxActionSpace: action = self.action_space.clip_action_to_space(action) self.last_result = self.env.step(action) def _restart_environment_episode(self, force_environment_reset=False): self.last_result = self.env.reset() def _render(self): pass def get_rendered_image(self): return self.env.physics.render(camera_id=0)
class RobosuiteEnvironment(Environment): def __init__(self, level: LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float, None], visualization_parameters: VisualizationParameters, base_parameters: RobosuiteBaseParameters, extra_parameters: Dict[str, Any], robot: str, controller: str, target_success_rate: float = 1.0, apply_dr: bool = False, dr_every_n_steps_min: int = 10, dr_every_n_steps_max: int = 20, use_joint_vel_obs=False, custom_controller_config_fpath=None, **kwargs): super(RobosuiteEnvironment, self).__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) # Validate arguments self.frame_skip = max(1, self.frame_skip) def validate_input(input, supported, name): if input not in supported: raise ValueError("Unknown Robosuite {0} passed: '{1}' ; Supported {0}s are: {2}".format( name, input, ' | '.join(supported) )) validate_input(self.env_id, robosuite_environments, 'environment') validate_input(robot, robosuite_robots, 'robot') self.robot = robot if controller is not None: validate_input(controller, robosuite_controllers, 'controller') self.controller = controller self.base_parameters = base_parameters self.base_parameters.has_renderer = self.is_rendered and self.native_rendering self.base_parameters.has_offscreen_renderer = self.base_parameters.use_camera_obs or (self.is_rendered and not self.native_rendering) # Seed if self.seed is not None: np.random.seed(self.seed) random.seed(self.seed) # Load and initialize environment env_args = self.base_parameters.env_kwargs_dict() env_args.update(extra_parameters) if 'reward_scale' not in env_args and self.env_id in DEFAULT_REWARD_SCALES: env_args['reward_scale'] = DEFAULT_REWARD_SCALES[self.env_id] env_args['robots'] = self.robot controller_cfg = None if self.controller is not None: controller_cfg = robosuite.controllers.load_controller_config(default_controller=self.controller) elif custom_controller_config_fpath is not None: controller_cfg = robosuite.controllers.load_controller_config(custom_fpath=custom_controller_config_fpath) env_args['controller_configs'] = controller_cfg self.env = robosuite.make(self.env_id, **env_args) # TODO: Generalize this to filter any observation by name if not use_joint_vel_obs: self.env.modify_observable('robot0_joint_vel', 'active', False) # Wrap with a dummy wrapper so we get a consistent API (there are subtle changes between # wrappers and actual environments in Robosuite, for example action_spec as property vs. function) self.env = Wrapper(self.env) if apply_dr: self.env = DomainRandomizationWrapper(self.env, seed=self.seed, randomize_every_n_steps_min=dr_every_n_steps_min, randomize_every_n_steps_max=dr_every_n_steps_max) # State space self.state_space = self._setup_state_space() # Action space low, high = self.env.unwrapped.action_spec self.action_space = BoxActionSpace(low.shape, low=low, high=high) self.reset_internal_state() if self.is_rendered: image = self.get_rendered_image() self.renderer.create_screen(image.shape[1], image.shape[0]) # TODO: Other environments call rendering here, why? reset_internal_state does it def _setup_state_space(self): state_space = StateSpace({}) dummy_obs = self._process_observation(self.env.observation_spec()) state_space['measurements'] = VectorObservationSpace(dummy_obs['measurements'].shape[0]) if self.base_parameters.use_camera_obs: state_space['camera'] = PlanarMapsObservationSpace(dummy_obs['camera'].shape, 0, 255) return state_space def _process_observation(self, raw_obs): new_obs = {} # TODO: Support multiple cameras, this assumes a single camera camera_name = self.base_parameters.camera_names camera_obs = raw_obs.get(camera_name + '_image', None) if camera_obs is not None: depth_obs = raw_obs.get(camera_name + '_depth', None) if depth_obs is not None: depth_obs = np.expand_dims(depth_obs, axis=2) camera_obs = np.concatenate([camera_obs, depth_obs], axis=2) new_obs['camera'] = camera_obs measurements = raw_obs['robot0_proprio-state'] object_obs = raw_obs.get('object-state', None) if object_obs is not None: measurements = np.concatenate([measurements, object_obs]) new_obs['measurements'] = measurements return new_obs def _take_action(self, action): action = self.action_space.clip_action_to_space(action) # We mimic the "action_repeat" mechanism of RobosuiteWrapper in Surreal. # Same concept as frame_skip, only returning the average reward across repeated actions instead # of the total reward. rewards = [] for _ in range(self.frame_skip): obs, reward, done, info = self.env.step(action) rewards.append(reward) if done: break reward = np.mean(rewards) self.last_result = RobosuiteStepResult(obs, reward, done, info) def _update_state(self): obs = self._process_observation(self.last_result.observation) self.state = {k: obs[k] for k in self.state_space.sub_spaces} self.reward = self.last_result.reward or 0 self.done = self.last_result.done self.info = self.last_result.info def _restart_environment_episode(self, force_environment_reset=False): reset_obs = self.env.reset() self.last_result = RobosuiteStepResult(reset_obs, 0.0, False, {}) def _render(self): self.env.render() def get_rendered_image(self): img: np.ndarray = self.env.sim.render(camera_name=self.base_parameters.render_camera, height=512, width=512, depth=False) return np.flip(img, 0) def close(self): self.env.close()