def test_filter(): filter = BoxMasking(10, 20) # passing an output space that is wrong with pytest.raises(ValueError): filter.validate_output_action_space(DiscreteActionSpace(10)) # 1 dimensional box output_space = BoxActionSpace(1, 5, 30) input_space = filter.get_unfiltered_action_space(output_space) action = np.array([2]) result = filter.filter(action) assert result == np.array([12]) assert output_space.contains(result)
def test_filter(): filter = AttentionDiscretization(2) # passing an output space that is wrong with pytest.raises(ValueError): filter.validate_output_action_space(DiscreteActionSpace(10)) with pytest.raises(ValueError): filter.validate_output_action_space(BoxActionSpace(10)) # 1 dimensional box output_space = AttentionActionSpace(2, 0, 83) input_space = filter.get_unfiltered_action_space(output_space) assert np.all(filter.target_actions == np.array( [[[0., 0.], [41.5, 41.5]], [[0., 41.5], [41.5, 83.]], [[41.5, 0], [83., 41.5]], [[41.5, 41.5], [83., 83.]]])) assert input_space.actions == list(range(4)) action = 2 result = filter.filter(action) assert np.all(result == np.array([[41.5, 0], [83., 41.5]])) assert output_space.contains(result) # force int bins filter = AttentionDiscretization(2, force_int_bins=True) input_space = filter.get_unfiltered_action_space(output_space) assert np.all(filter.target_actions == np.array( [[[0., 0.], [41, 41]], [[0., 41], [41, 83.]], [[41, 0], [83., 41]], [[41, 41], [83., 83.]]]))
def get_unfiltered_action_space( self, output_action_space: AttentionActionSpace) -> DiscreteActionSpace: if isinstance(self.num_bins_per_dimension, int): self.num_bins_per_dimension = [self.num_bins_per_dimension ] * output_action_space.shape[0] # create a discrete to linspace map to ease the extraction of attention actions discrete_to_box = BoxDiscretization( [n + 1 for n in self.num_bins_per_dimension], self.force_int_bins) discrete_to_box.get_unfiltered_action_space( BoxActionSpace(output_action_space.shape, output_action_space.low, output_action_space.high), ) rows, cols = self.num_bins_per_dimension start_ind = [ i * (cols + 1) + j for i in range(rows + 1) if i < rows for j in range(cols + 1) if j < cols ] end_ind = [i + cols + 2 for i in start_ind] self.target_actions = [ np.array([ discrete_to_box.target_actions[start], discrete_to_box.target_actions[end] ]) for start, end in zip(start_ind, end_ind) ] return super().get_unfiltered_action_space(output_action_space)
def get_unfiltered_action_space( self, output_action_space: BoxActionSpace) -> BoxActionSpace: self.output_action_space = output_action_space self.input_action_space = BoxActionSpace( output_action_space.shape, low=0, high=self.masked_target_space_high - self.masked_target_space_low) return self.input_action_space
def test_filter(): filter = LinearBoxToBoxMap(10, 20) # passing an output space that is wrong with pytest.raises(ValueError): filter.validate_output_action_space(DiscreteActionSpace(10)) # 1 dimensional box output_space = BoxActionSpace(1, 5, 35) input_space = filter.get_unfiltered_action_space(output_space) action = np.array([2]) action = np.array([12]) result = filter.filter(action) assert result == np.array([11]) assert output_space.val_matches_space_definition(result)
def test_init(): # discrete control action_space = DiscreteActionSpace(3) noise_schedule = LinearSchedule(1.0, 1.0, 1000) # additive noise requires a bounded range for the actions action_space = BoxActionSpace(np.array([10])) with pytest.raises(ValueError): policy = AdditiveNoise(action_space, noise_schedule, 0)
def get_unfiltered_action_space( self, output_action_space: BoxActionSpace) -> BoxActionSpace: self.input_action_space = BoxActionSpace(output_action_space.shape, self.input_space_low, self.input_space_high) self.rescale = \ (output_action_space.high - output_action_space.low) / (self.input_space_high - self.input_space_low) self.offset = output_action_space.low - self.input_space_low self.output_action_space = output_action_space return self.input_action_space
def set_masking(self, masked_target_space_low: Union[None, int, float, np.ndarray], masked_target_space_high: Union[None, int, float, np.ndarray]): self.masked_target_space_low = masked_target_space_low self.masked_target_space_high = masked_target_space_high self.offset = masked_target_space_low if self.output_action_space: self.validate_output_action_space(self.output_action_space) self.input_action_space = BoxActionSpace(self.output_action_space.shape, low=0, high=self.masked_target_space_high - self.masked_target_space_low)
def get_action_space(self, json_actions): """Return the action space for the training algorithm Args: json_actions (dict): The json object containing the value of action_space key in model_metadata.json Returns: ActionSpace: Action space object for the particular training algorithm """ try: # action_space for SAC training algorithm is assumed to be a dict with the format: # action_space:{steering_angle:{low:-30.0 , high:30.0}, speed:{low:0.1, high:4.0}} if (json_actions[ModelMetadataKeys.STEERING_ANGLE.value][ ModelMetadataKeys.LOW.value] >= json_actions[ModelMetadataKeys.STEERING_ANGLE.value] [ModelMetadataKeys.HIGH.value] or json_actions[ ModelMetadataKeys.SPEED.value][ModelMetadataKeys.LOW.value] >= json_actions[ModelMetadataKeys.SPEED.value][ ModelMetadataKeys.HIGH.value]): log_and_exit( "Action space bounds are incorrect while defining SACActionSpaceConfig. \ json_actions: {}".format(json_actions), SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_500, ) low = np.array([ json_actions[ModelMetadataKeys.STEERING_ANGLE.value][ ModelMetadataKeys.LOW.value], json_actions[ModelMetadataKeys.SPEED.value][ ModelMetadataKeys.LOW.value], ]) high = np.array([ json_actions[ModelMetadataKeys.STEERING_ANGLE.value][ ModelMetadataKeys.HIGH.value], json_actions[ModelMetadataKeys.SPEED.value][ ModelMetadataKeys.HIGH.value], ]) action_space = BoxActionSpace(2, low, high, default_action=0.5 * (high + low)) return action_space except Exception as ex: log_and_exit( "Error while getting action space in SACActionSpaceConfig: {}". format(ex), SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_500, )
def test_get_action(): # discrete control action_space = DiscreteActionSpace(3) policy = Greedy(action_space) best_action, _ = policy.get_action(np.array([10, 20, 30])) assert best_action == 2 # continuous control action_space = BoxActionSpace(np.array([10])) policy = Greedy(action_space) best_action = policy.get_action(np.array([1, 1, 1])) assert np.all(best_action == np.array([1, 1, 1]))
def test_get_action(): # make sure noise is in range action_space = BoxActionSpace(np.array([10]), -1, 1) noise_schedule = LinearSchedule(1.0, 1.0, 1000) policy = AdditiveNoise(action_space, noise_schedule, 0) # the action range is 2, so there is a ~0.1% chance that the noise will be larger than 3*std=3*2=6 for i in range(1000): action = policy.get_action(np.zeros([10])) assert np.all(action < 10) # make sure there is no clipping of the action since it should be the environment that clips actions assert np.all(action != 1.0) assert np.all(action != -1.0) # make sure that each action element has a different value assert np.all(action[0] != action[1:])
def test_filter(): filter = BoxDiscretization(9) # passing an output space that is wrong with pytest.raises(ValueError): filter.validate_output_action_space(DiscreteActionSpace(10)) # 1 dimensional box output_space = BoxActionSpace(1, 5, 15) input_space = filter.get_unfiltered_action_space(output_space) assert filter.target_actions == [[5.], [6.25], [7.5], [8.75], [10.], [11.25], [12.5], [13.75], [15.]] assert input_space.actions == list(range(9)) action = 2 result = filter.filter(action) assert result == [7.5] assert output_space.contains(result) # 2 dimensional box filter = BoxDiscretization(3) output_space = BoxActionSpace(2, 5, 15) input_space = filter.get_unfiltered_action_space(output_space) assert filter.target_actions == [[5., 5.], [5., 10.], [5., 15.], [10., 5.], [10., 10.], [10., 15.], [15., 5.], [15., 10.], [15., 15.]] assert input_space.actions == list(range(9)) action = 2 result = filter.filter(action) assert result == [5., 15.] assert output_space.contains(result)
def __init__( self, level: LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: VisualizationParameters, host: str, port: int, timeout: float, number_of_vehicles: int, number_of_walkers: int, weather_id: int, #rendering_mode: bool, ego_vehicle_filter: str, display_size: int, sensors: List[SensorTypes], camera_height: int, camera_width: int, lidar_bin: float, obs_range: float, display_route: bool, render_pygame: bool, d_behind: float, out_lane_thres: float, desired_speed: float, max_past_step: int, dt: float, discrete: bool, discrete_acc: List[float], discrete_steer: List[float], continuous_accel_range: List[float], continuous_steer_range: List[float], max_ego_spawn_times: int, max_time_episode: int, max_waypt: int, **kwargs): super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters) self.level = level # self.frame_skip = frame_skip # self.seed = seed # self.human_control = human_control # self.custom_reward_threshold = custom_reward_threshold # self.visualization_paramters = visualization_parameters self.host = host self.port = port self.timeout = timeout self.number_of_vehicles = number_of_vehicles self.number_of_walkers = number_of_walkers self.weather_id = weather_id self.ego_vehicle_filter = ego_vehicle_filter self.display_size = display_size self.sensors = sensors self.camera_height = camera_height self.camera_width = camera_width self.obs_range = obs_range self.lidar_bin = lidar_bin self.obs_size = int(self.obs_range / self.lidar_bin) self.display_route = display_route self.render_pygame = render_pygame self.d_behind = d_behind self.out_lane_thres = out_lane_thres self.desired_speed = desired_speed self.max_past_step = max_past_step self.dt = dt self.discrete = discrete self.discrete_acc = discrete_acc self.discrete_steer = discrete_steer self.continuous_accel_range = continuous_accel_range self.continuous_steer_range = continuous_steer_range self.max_ego_spawn_times = max_ego_spawn_times self.max_time_episode = max_time_episode self.max_waypt = max_waypt # Connect to carla server and get world object print('connecting to Carla server...') self.client = carla.Client(self.host, self.port) self.client.set_timeout(self.timeout) self.traffic_manager = self.client.get_trafficmanager() self.world = self.client.load_world(level) print('Carla server connected!') # Set weather self.world.set_weather(CARLA_WEATHER_PRESETS[self.weather_id]) # Get spawn points self._get_spawn_points() # Create the ego vehicle blueprint self.ego_bp = self._create_vehicle_bluepprint(self.ego_vehicle_filter, color='49,8,8') # Collision sensor self.collision_hist = [] # The collision history self.collision_hist_l = 1 # collision history length self.collision_bp = self.world.get_blueprint_library().find( 'sensor.other.collision') # Lidar sensor self.lidar_data = None self.lidar_height = 2.1 self.lidar_trans = carla.Transform( carla.Location(x=0.0, z=self.lidar_height)) self.lidar_bp = self.world.get_blueprint_library().find( 'sensor.lidar.ray_cast') self.lidar_bp.set_attribute('channels', '32') self.lidar_bp.set_attribute('range', '5000') # Camera sensor self.camera_img = np.zeros((self.camera_height, self.camera_width, 3), dtype=np.uint8) self.camera_trans = carla.Transform(carla.Location(x=0.8, z=1.7)) self.camera_bp = self.world.get_blueprint_library().find( 'sensor.camera.rgb') # Modify the attributes of the blueprint to set image resolution and field of view. self.camera_bp.set_attribute('image_size_x', str(self.camera_width)) self.camera_bp.set_attribute('image_size_y', str(self.camera_height)) self.camera_bp.set_attribute('fov', '110') # Set the time in seconds between sensor captures self.camera_bp.set_attribute('sensor_tick', '0.02') # Set fixed simulation step for synchronous mode self.settings = self.world.get_settings() self.settings.fixed_delta_seconds = self.dt #self.settings.no_rendering_mode = rendering_mode self._set_synchronous_mode(True) # Record the time of total steps and resetting steps self.reset_step = 0 self.total_step = 0 # Action space if self.discrete: self.discrete_act = [discrete_acc, discrete_steer] self.n_acc = len(self.discrete_act[0]) self.n_steer = len(self.discrete_act[1]) self.action_space = DiscreteActionSpace( num_actions=self.n_acc * self.n_steer, descriptions=["acceleration", "steering"]) else: self.action_space = BoxActionSpace( shape=2, low=np.array( [continuous_accel_range[0], continuous_steer_range[0]]), high=np.array( [continuous_accel_range[1], continuous_steer_range[1]]), descriptions=["acceleration", "steering"]) # Observation space self.state_space = StateSpace({ "measurements": VectorObservationSpace(shape=4, low=np.array([-2, -1, -5, 0]), high=np.array([2, 1, 30, 1]), measurements_names=[ "lat_dist", "heading_error", "ego_speed", "safety_margin" ]) }) if SensorTypes.FRONT_CAMERA in self.sensors: self.state_space[ SensorTypes.FRONT_CAMERA.value] = ImageObservationSpace( shape=np.array([self.camera_height, self.camera_width, 3]), high=255) if SensorTypes.LIDAR in self.sensors: self.state_space[ SensorTypes.LIDAR.value] = PlanarMapsObservationSpace( shape=np.array([self.obs_size, self.obs_size, 3]), low=0, high=255) if SensorTypes.BIRDEYE in self.sensors: self.state_space[ SensorTypes.BIRDEYE.value] = ImageObservationSpace( shape=np.array([self.obs_size, self.obs_size, 3]), high=255) # Initialize the renderer self._init_renderer() self.reset_internal_state(True)
def test_box(): # simple action space action_space = BoxActionSpace(4, -5, 5, ["a", "b", "c", "d"]) for i in range(100): sample = action_space.sample() assert np.all(-5 <= sample) and np.all(sample <= 5) assert sample.shape == (4, ) assert sample.dtype == float # test clipping clipped_action = action_space.clip_action_to_space( np.array([-10, 10, 2, 5])) assert np.all(clipped_action == np.array([-5, 5, 2, 5])) # more complex high and low definition action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), np.array([1, 2, 4, 5]), ["a", "b", "c", "d"]) for i in range(100): sample = action_space.sample() assert np.all(np.array([-5, -1, -0.5, 0]) <= sample) and np.all( sample <= np.array([1, 2, 4, 5])) assert sample.shape == (4, ) assert sample.dtype == float # test clipping clipped_action = action_space.clip_action_to_space( np.array([-10, 10, 2, 5])) assert np.all(clipped_action == np.array([-5, 2, 2, 5])) # mixed high and low definition action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), 5, ["a", "b", "c", "d"]) for i in range(100): sample = action_space.sample() assert np.all(np.array([-5, -1, -0.5, 0]) <= sample) and np.all( sample <= 5) assert sample.shape == (4, ) assert sample.dtype == float # test clipping clipped_action = action_space.clip_action_to_space( np.array([-10, 10, 2, 5])) assert np.all(clipped_action == np.array([-5, 5, 2, 5])) # invalid bounds with pytest.raises(ValueError): action_space = BoxActionSpace(4, np.array([-5, -1, -0.5, 0]), -1, ["a", "b", "c", "d"])
def __init__(self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, additional_simulator_parameters: Dict[str, Any] = None, seed: Union[None, int] = None, human_control: bool = False, custom_reward_threshold: Union[int, float] = None, random_initialization_steps: int = 1, max_over_num_frames: int = 1, **kwargs): super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters) self.random_initialization_steps = random_initialization_steps self.max_over_num_frames = max_over_num_frames self.additional_simulator_parameters = additional_simulator_parameters # hide warnings gym.logger.set_level(40) """ load and initialize environment environment ids can be defined in 3 ways: 1. Native gym environments like BreakoutDeterministic-v0 for example 2. Custom gym environments written and installed as python packages. This environments should have a python module with a class inheriting gym.Env, implementing the relevant functions (_reset, _step, _render) and defining the observation and action space For example: my_environment_package:MyEnvironmentClass will run an environment defined in the MyEnvironmentClass class 3. Custom gym environments written as an independent module which is not installed. This environments should have a python module with a class inheriting gym.Env, implementing the relevant functions (_reset, _step, _render) and defining the observation and action space. For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an environment defined in the MyEnvironmentClass class which is located in the module in the relative path path_to_my_environment.sub_directory.my_module """ if ':' in self.env_id: # custom environments if '/' in self.env_id or '.' in self.env_id: # environment in a an absolute path module written as a unix path or in a relative path module # written as a python import path env_class = short_dynamic_import(self.env_id) else: # environment in a python package env_class = gym.envs.registration.load(self.env_id) # instantiate the environment if self.additional_simulator_parameters: self.env = env_class(**self.additional_simulator_parameters) else: self.env = env_class() else: self.env = gym.make(self.env_id) # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows environment_to_always_use_with_native_rendering = [ 'classic_control', 'mujoco', 'robotics' ] self.native_rendering = self.native_rendering or \ any([env in str(self.env.unwrapped.__class__) for env in environment_to_always_use_with_native_rendering]) if self.native_rendering: if hasattr(self, 'renderer'): self.renderer.close() # seed if self.seed is not None: self.env.seed(self.seed) np.random.seed(self.seed) random.seed(self.seed) # frame skip and max between consecutive frames self.is_robotics_env = 'robotics' in str(self.env.unwrapped.__class__) self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__) self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__) self.timelimit_env_wrapper = self.env if self.is_atari_env: self.env.unwrapped.frameskip = 1 # this accesses the atari env that is wrapped with a timelimit wrapper env if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4: screen.warning( "Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. " "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the " "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4." ) self.frame_skip = 3 self.env = MaxOverFramesAndFrameskipEnvWrapper( self.env, frameskip=self.frame_skip, max_over_num_frames=self.max_over_num_frames) else: self.env.unwrapped.frameskip = self.frame_skip self.state_space = StateSpace({}) # observations if not isinstance(self.env.observation_space, gym.spaces.dict_space.Dict): state_space = {'observation': self.env.observation_space} else: state_space = self.env.observation_space.spaces for observation_space_name, observation_space in state_space.items(): if len(observation_space.shape ) == 3 and observation_space.shape[-1] == 3: # we assume gym has image observations which are RGB and where their values are within 0-255 self.state_space[ observation_space_name] = ImageObservationSpace( shape=np.array(observation_space.shape), high=255, channels_axis=-1) else: self.state_space[ observation_space_name] = VectorObservationSpace( shape=observation_space.shape[0], low=observation_space.low, high=observation_space.high) if 'desired_goal' in state_space.keys(): self.goal_space = self.state_space['desired_goal'] # actions if type(self.env.action_space) == gym.spaces.box.Box: self.action_space = BoxActionSpace( shape=self.env.action_space.shape, low=self.env.action_space.low, high=self.env.action_space.high) elif type(self.env.action_space) == gym.spaces.discrete.Discrete: actions_description = [] if hasattr(self.env.unwrapped, 'get_action_meanings'): actions_description = self.env.unwrapped.get_action_meanings() self.action_space = DiscreteActionSpace( num_actions=self.env.action_space.n, descriptions=actions_description) if self.human_control: # TODO: add this to the action space # map keyboard keys to actions self.key_to_action = {} if hasattr(self.env.unwrapped, 'get_keys_to_action'): self.key_to_action = self.env.unwrapped.get_keys_to_action() # initialize the state by getting a new state from the environment self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() scale = 1 if self.human_control: scale = 2 if not self.native_rendering: self.renderer.create_screen(image.shape[1] * scale, image.shape[0] * scale) # measurements if self.env.spec is not None: self.timestep_limit = self.env.spec.timestep_limit else: self.timestep_limit = None # the info is only updated after the first step self.state = self.step(self.action_space.default_action).next_state self.state_space['measurements'] = VectorObservationSpace( shape=len(self.info.keys())) if self.env.spec and custom_reward_threshold is None: self.reward_success_threshold = self.env.spec.reward_threshold self.reward_space = RewardSpace( 1, reward_success_threshold=self.reward_success_threshold)
def __init__( self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, target_success_rate: float = 1.0, seed: Union[None, int] = None, human_control: bool = False, observation_type: ObservationType = ObservationType.Measurements, custom_reward_threshold: Union[int, float] = None, **kwargs): """ :param level: (str) A string representing the control suite level to run. This can also be a LevelSelection object. For example, cartpole:swingup. :param frame_skip: (int) The number of frames to skip between any two actions given by the agent. The action will be repeated for all the skipped frames. :param visualization_parameters: (VisualizationParameters) The parameters used for visualizing the environment, such as the render flag, storing videos etc. :param target_success_rate: (float) Stop experiment if given target success rate was achieved. :param seed: (int) A seed to use for the random number generator when running the environment. :param human_control: (bool) A flag that allows controlling the environment using the keyboard keys. :param observation_type: (ObservationType) An enum which defines which observation to use. The current options are to use: * Measurements only - a vector of joint torques and similar measurements * Image only - an image of the environment as seen by a camera attached to the simulator * Measurements & Image - both type of observations will be returned in the state using the keys 'measurements' and 'pixels' respectively. :param custom_reward_threshold: (float) Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment. """ super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) self.observation_type = observation_type # load and initialize environment domain_name, task_name = self.env_id.split(":") self.env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs={'random': seed}) if observation_type != ObservationType.Measurements: self.env = pixels.Wrapper( self.env, pixels_only=observation_type == ObservationType.Image) # seed if self.seed is not None: np.random.seed(self.seed) random.seed(self.seed) self.state_space = StateSpace({}) # image observations if observation_type != ObservationType.Measurements: self.state_space['pixels'] = ImageObservationSpace( shape=self.env.observation_spec()['pixels'].shape, high=255) # measurements observations if observation_type != ObservationType.Image: measurements_space_size = 0 measurements_names = [] for observation_space_name, observation_space in self.env.observation_spec( ).items(): if len(observation_space.shape) == 0: measurements_space_size += 1 measurements_names.append(observation_space_name) elif len(observation_space.shape) == 1: measurements_space_size += observation_space.shape[0] measurements_names.extend([ "{}_{}".format(observation_space_name, i) for i in range(observation_space.shape[0]) ]) self.state_space['measurements'] = VectorObservationSpace( shape=measurements_space_size, measurements_names=measurements_names) # actions self.action_space = BoxActionSpace( shape=self.env.action_spec().shape[0], low=self.env.action_spec().minimum, high=self.env.action_spec().maximum) # initialize the state by getting a new state from the environment self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() scale = 1 if self.human_control: scale = 2 if not self.native_rendering: self.renderer.create_screen(image.shape[1] * scale, image.shape[0] * scale) self.target_success_rate = target_success_rate
def __init__(self, level: LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: VisualizationParameters, server_height: int, server_width: int, camera_height: int, camera_width: int, verbose: bool, experiment_suite: ExperimentSuite, config: str, episode_max_time: int, allow_braking: bool, quality: CarlaEnvironmentParameters.Quality, cameras: List[CameraTypes], weather_id: List[int], experiment_path: str, separate_actions_for_throttle_and_brake: bool, num_speedup_steps: int, max_speed: float, **kwargs): super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters) # server configuration self.server_height = server_height self.server_width = server_width self.port = get_open_port() self.host = 'localhost' self.map_name = CarlaLevel[level.upper()].value['map_name'] self.map_path = CarlaLevel[level.upper()].value['map_path'] self.experiment_path = experiment_path # client configuration self.verbose = verbose self.quality = quality self.cameras = cameras self.weather_id = weather_id self.episode_max_time = episode_max_time self.allow_braking = allow_braking self.separate_actions_for_throttle_and_brake = separate_actions_for_throttle_and_brake self.camera_width = camera_width self.camera_height = camera_height # setup server settings self.experiment_suite = experiment_suite self.config = config if self.config: # load settings from file with open(self.config, 'r') as fp: self.settings = fp.read() else: # hard coded settings self.settings = CarlaSettings() self.settings.set(SynchronousMode=True, SendNonPlayerAgentsInfo=False, NumberOfVehicles=15, NumberOfPedestrians=30, WeatherId=random.choice( force_list(self.weather_id)), QualityLevel=self.quality.value, SeedVehicles=seed, SeedPedestrians=seed) if seed is None: self.settings.randomize_seeds() self.settings = self._add_cameras(self.settings, self.cameras, self.camera_width, self.camera_height) # open the server self.server = self._open_server() logging.disable(40) # open the client self.game = CarlaClient(self.host, self.port, timeout=99999999) self.game.connect() if self.experiment_suite: self.current_experiment_idx = 0 self.current_experiment = self.experiment_suite.get_experiments()[ self.current_experiment_idx] self.scene = self.game.load_settings( self.current_experiment.conditions) else: self.scene = self.game.load_settings(self.settings) # get available start positions self.positions = self.scene.player_start_spots self.num_positions = len(self.positions) self.current_start_position_idx = 0 self.current_pose = 0 # state space self.state_space = StateSpace({ "measurements": VectorObservationSpace( 4, measurements_names=["forward_speed", "x", "y", "z"]) }) for camera in self.scene.sensors: self.state_space[camera.name] = ImageObservationSpace( shape=np.array([self.camera_height, self.camera_width, 3]), high=255) # action space if self.separate_actions_for_throttle_and_brake: self.action_space = BoxActionSpace( shape=3, low=np.array([-1, 0, 0]), high=np.array([1, 1, 1]), descriptions=["steer", "gas", "brake"]) else: self.action_space = BoxActionSpace( shape=2, low=np.array([-1, -1]), high=np.array([1, 1]), descriptions=["steer", "gas_and_brake"]) # human control if self.human_control: # convert continuous action space to discrete self.steering_strength = 0.5 self.gas_strength = 1.0 self.brake_strength = 0.5 # TODO: reverse order of actions self.action_space = PartialDiscreteActionSpaceMap( target_actions=[[0., 0.], [0., -self.steering_strength], [0., self.steering_strength], [self.gas_strength, 0.], [-self.brake_strength, 0], [self.gas_strength, -self.steering_strength], [self.gas_strength, self.steering_strength], [self.brake_strength, -self.steering_strength], [self.brake_strength, self.steering_strength]], descriptions=[ 'NO-OP', 'TURN_LEFT', 'TURN_RIGHT', 'GAS', 'BRAKE', 'GAS_AND_TURN_LEFT', 'GAS_AND_TURN_RIGHT', 'BRAKE_AND_TURN_LEFT', 'BRAKE_AND_TURN_RIGHT' ]) # map keyboard keys to actions for idx, action in enumerate(self.action_space.descriptions): for key in key_map.keys(): if action == key: self.key_to_action[key_map[key]] = idx self.num_speedup_steps = num_speedup_steps self.max_speed = max_speed # measurements self.autopilot = None self.planner = Planner(self.map_name) # env initialization self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() self.renderer.create_screen(image.shape[1], image.shape[0])
class RobosuiteEnvironment(Environment): def __init__(self, level: LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float, None], visualization_parameters: VisualizationParameters, base_parameters: RobosuiteBaseParameters, extra_parameters: Dict[str, Any], robot: str, controller: str, target_success_rate: float = 1.0, apply_dr: bool = False, dr_every_n_steps_min: int = 10, dr_every_n_steps_max: int = 20, use_joint_vel_obs=False, custom_controller_config_fpath=None, **kwargs): super(RobosuiteEnvironment, self).__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) # Validate arguments self.frame_skip = max(1, self.frame_skip) def validate_input(input, supported, name): if input not in supported: raise ValueError("Unknown Robosuite {0} passed: '{1}' ; Supported {0}s are: {2}".format( name, input, ' | '.join(supported) )) validate_input(self.env_id, robosuite_environments, 'environment') validate_input(robot, robosuite_robots, 'robot') self.robot = robot if controller is not None: validate_input(controller, robosuite_controllers, 'controller') self.controller = controller self.base_parameters = base_parameters self.base_parameters.has_renderer = self.is_rendered and self.native_rendering self.base_parameters.has_offscreen_renderer = self.base_parameters.use_camera_obs or (self.is_rendered and not self.native_rendering) # Seed if self.seed is not None: np.random.seed(self.seed) random.seed(self.seed) # Load and initialize environment env_args = self.base_parameters.env_kwargs_dict() env_args.update(extra_parameters) if 'reward_scale' not in env_args and self.env_id in DEFAULT_REWARD_SCALES: env_args['reward_scale'] = DEFAULT_REWARD_SCALES[self.env_id] env_args['robots'] = self.robot controller_cfg = None if self.controller is not None: controller_cfg = robosuite.controllers.load_controller_config(default_controller=self.controller) elif custom_controller_config_fpath is not None: controller_cfg = robosuite.controllers.load_controller_config(custom_fpath=custom_controller_config_fpath) env_args['controller_configs'] = controller_cfg self.env = robosuite.make(self.env_id, **env_args) # TODO: Generalize this to filter any observation by name if not use_joint_vel_obs: self.env.modify_observable('robot0_joint_vel', 'active', False) # Wrap with a dummy wrapper so we get a consistent API (there are subtle changes between # wrappers and actual environments in Robosuite, for example action_spec as property vs. function) self.env = Wrapper(self.env) if apply_dr: self.env = DomainRandomizationWrapper(self.env, seed=self.seed, randomize_every_n_steps_min=dr_every_n_steps_min, randomize_every_n_steps_max=dr_every_n_steps_max) # State space self.state_space = self._setup_state_space() # Action space low, high = self.env.unwrapped.action_spec self.action_space = BoxActionSpace(low.shape, low=low, high=high) self.reset_internal_state() if self.is_rendered: image = self.get_rendered_image() self.renderer.create_screen(image.shape[1], image.shape[0]) # TODO: Other environments call rendering here, why? reset_internal_state does it def _setup_state_space(self): state_space = StateSpace({}) dummy_obs = self._process_observation(self.env.observation_spec()) state_space['measurements'] = VectorObservationSpace(dummy_obs['measurements'].shape[0]) if self.base_parameters.use_camera_obs: state_space['camera'] = PlanarMapsObservationSpace(dummy_obs['camera'].shape, 0, 255) return state_space def _process_observation(self, raw_obs): new_obs = {} # TODO: Support multiple cameras, this assumes a single camera camera_name = self.base_parameters.camera_names camera_obs = raw_obs.get(camera_name + '_image', None) if camera_obs is not None: depth_obs = raw_obs.get(camera_name + '_depth', None) if depth_obs is not None: depth_obs = np.expand_dims(depth_obs, axis=2) camera_obs = np.concatenate([camera_obs, depth_obs], axis=2) new_obs['camera'] = camera_obs measurements = raw_obs['robot0_proprio-state'] object_obs = raw_obs.get('object-state', None) if object_obs is not None: measurements = np.concatenate([measurements, object_obs]) new_obs['measurements'] = measurements return new_obs def _take_action(self, action): action = self.action_space.clip_action_to_space(action) # We mimic the "action_repeat" mechanism of RobosuiteWrapper in Surreal. # Same concept as frame_skip, only returning the average reward across repeated actions instead # of the total reward. rewards = [] for _ in range(self.frame_skip): obs, reward, done, info = self.env.step(action) rewards.append(reward) if done: break reward = np.mean(rewards) self.last_result = RobosuiteStepResult(obs, reward, done, info) def _update_state(self): obs = self._process_observation(self.last_result.observation) self.state = {k: obs[k] for k in self.state_space.sub_spaces} self.reward = self.last_result.reward or 0 self.done = self.last_result.done self.info = self.last_result.info def _restart_environment_episode(self, force_environment_reset=False): reset_obs = self.env.reset() self.last_result = RobosuiteStepResult(reset_obs, 0.0, False, {}) def _render(self): self.env.render() def get_rendered_image(self): img: np.ndarray = self.env.sim.render(camera_name=self.base_parameters.render_camera, height=512, width=512, depth=False) return np.flip(img, 0) def close(self): self.env.close()
def test_get_action(): action_space = BoxActionSpace(np.array([10]), -1, 1) policy = OUProcess(action_space, mu=0, theta=0.1, sigma=0.2, dt=0.01) # make sure no noise is added in the testing phase policy.change_phase(RunPhase.TEST) assert np.all(policy.get_action(np.zeros((10, ))) == np.zeros((10, ))) rand_action = np.random.rand(10) assert np.all(policy.get_action(rand_action) == rand_action) # make sure the noise added in the training phase matches the golden policy.change_phase(RunPhase.TRAIN) np.random.seed(0) targets = [ [ 0.03528105, 0.00800314, 0.01957476, 0.04481786, 0.03735116, -0.01954556, 0.01900177, -0.00302714, -0.00206438, 0.00821197 ], [ 0.03812664, 0.03708061, 0.03477594, 0.04720655, 0.04619107, -0.01285253, 0.04886435, -0.00712728, 0.00419904, -0.00887816 ], [ -0.01297129, 0.0501159, 0.05202989, 0.03231604, 0.09153997, -0.04192699, 0.04973065, -0.01086383, 0.03485043, 0.0205179 ], [ -0.00985937, 0.05762904, 0.03422214, -0.00733221, 0.08449019, -0.03875808, 0.07428674, 0.01319463, 0.02706904, 0.01445132 ], [ -3.08205658e-02, 2.91710492e-02, 6.25166679e-05, 3.16906342e-02, 7.42126579e-02, -4.74808080e-02, 4.91565431e-02, 2.87312413e-02, -5.23598615e-03, 1.01820670e-02 ], [ -0.04869908, 0.03687993, -0.01015365, 0.0080463, 0.0735748, -0.03886669, 0.05043773, 0.03475195, -0.01791719, 0.00291706 ], [ -0.06209959, 0.02965198, -0.02640642, -0.0264874, 0.07704975, -0.04686344, 0.01778333, 0.04397284, -0.03604524, 0.00395305 ], [ -0.04745568, 0.03220199, -0.003592, -0.05115743, 0.08501953, -0.06051278, 0.0003496, 0.03235188, -0.04224025, 0.00507241 ], [ -0.07071122, 0.05018632, 0.00572484, -0.08183114, 0.11469956, -0.02253448, 0.02392484, 0.02872103, -0.06361306, 0.02615637 ], [ -0.07870404, 0.07458503, 0.00988462, -0.06221653, 0.12171218, -0.00838049, 0.02411092, 0.06440972, -0.0610112, 0.03417 ], [ -0.04096233, 0.04755527, -0.01553497, -0.04276638, 0.098128, 0.03050032, 0.01581443, 0.04939621, -0.02249135, 0.06374613 ], [ -0.00357018, 0.06562861, -0.03274395, -0.00452232, 0.09266981, 0.04651895, 0.03474365, 0.04624661, -0.01018727, 0.08212651 ], ] for i in range(10): current_noise = policy.get_action(np.zeros((10, ))) assert np.all(np.abs(current_noise - targets[i]) < 1e-7) # get some statistics. check very roughly that the mean acts according to the definition of the policy # mean of 0 vals = [] for i in range(50000): current_noise = policy.get_action(np.zeros((10, ))) vals.append(current_noise) assert np.all(np.abs(np.mean(vals, axis=0)) < 1) # mean of 10 policy = OUProcess(action_space, mu=10, theta=0.1, sigma=0.2, dt=0.01) policy.change_phase(RunPhase.TRAIN) vals = [] for i in range(50000): current_noise = policy.get_action(np.zeros((10, ))) vals.append(current_noise) assert np.all(np.abs(np.mean(vals, axis=0) - 10) < 1)
def __init__(self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, target_success_rate: float = 1.0, seed: Union[None, int] = None, human_control: bool = False, custom_reward_threshold: Union[int, float] = None, screen_size: int = 84, minimap_size: int = 64, feature_minimap_maps_to_use: List = range(7), feature_screen_maps_to_use: List = range(17), observation_type: StarcraftObservationType = StarcraftObservationType.Features, disable_fog: bool = False, auto_select_all_army: bool = True, use_full_action_space: bool = False, **kwargs): super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) self.screen_size = screen_size self.minimap_size = minimap_size self.feature_minimap_maps_to_use = feature_minimap_maps_to_use self.feature_screen_maps_to_use = feature_screen_maps_to_use self.observation_type = observation_type self.features_screen_size = None self.feature_minimap_size = None self.rgb_screen_size = None self.rgb_minimap_size = None if self.observation_type == StarcraftObservationType.Features: self.features_screen_size = screen_size self.feature_minimap_size = minimap_size elif self.observation_type == StarcraftObservationType.RGB: self.rgb_screen_size = screen_size self.rgb_minimap_size = minimap_size self.disable_fog = disable_fog self.auto_select_all_army = auto_select_all_army self.use_full_action_space = use_full_action_space # step_mul is the equivalent to frame skipping. Not sure if it repeats actions in between or not though. self.env = sc2_env.SC2Env( map_name=self.env_id, step_mul=frame_skip, visualize=self.is_rendered, agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=self.features_screen_size, minimap=self.feature_minimap_size) # rgb_dimensions=sc2_env.Dimensions( # screen=self.rgb_screen_size, # minimap=self.rgb_screen_size # ) ), # feature_screen_size=self.features_screen_size, # feature_minimap_size=self.feature_minimap_size, # rgb_screen_size=self.rgb_screen_size, # rgb_minimap_size=self.rgb_screen_size, disable_fog=disable_fog, random_seed=self.seed) # print all the available actions # self.env = available_actions_printer.AvailableActionsPrinter(self.env) self.reset_internal_state(True) """ feature_screen: [height_map, visibility_map, creep, power, player_id, player_relative, unit_type, selected, unit_hit_points, unit_hit_points_ratio, unit_energy, unit_energy_ratio, unit_shields, unit_shields_ratio, unit_density, unit_density_aa, effects] feature_minimap: [height_map, visibility_map, creep, camera, player_id, player_relative, selecte d] player: [player_id, minerals, vespene, food_cap, food_army, food_workers, idle_worker_dount, army_count, warp_gate_count, larva_count] """ self.screen_shape = np.array( self.env.observation_spec()[0]['feature_screen']) self.screen_shape[0] = len(self.feature_screen_maps_to_use) self.minimap_shape = np.array( self.env.observation_spec()[0]['feature_minimap']) self.minimap_shape[0] = len(self.feature_minimap_maps_to_use) self.state_space = StateSpace({ "screen": PlanarMapsObservationSpace(shape=self.screen_shape, low=0, high=255, channels_axis=0), "minimap": PlanarMapsObservationSpace(shape=self.minimap_shape, low=0, high=255, channels_axis=0), "measurements": VectorObservationSpace(self.env.observation_spec()[0]["player"][0]) }) if self.use_full_action_space: action_identifiers = list(self.env.action_spec()[0].functions) num_action_identifiers = len(action_identifiers) action_arguments = [(arg.name, arg.sizes) for arg in self.env.action_spec()[0].types] sub_action_spaces = [DiscreteActionSpace(num_action_identifiers)] for argument in action_arguments: for dimension in argument[1]: sub_action_spaces.append(DiscreteActionSpace(dimension)) self.action_space = CompoundActionSpace(sub_action_spaces) else: self.action_space = BoxActionSpace(2, 0, self.screen_size - 1, ["X-Axis, Y-Axis"], default_action=np.array([ self.screen_size / 2, self.screen_size / 2 ])) self.target_success_rate = target_success_rate
def __init__(self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, target_success_rate: float = 1.0, additional_simulator_parameters: Dict[str, Any] = {}, seed: Union[None, int] = None, human_control: bool = False, custom_reward_threshold: Union[int, float] = None, random_initialization_steps: int = 1, max_over_num_frames: int = 1, observation_space_type: ObservationSpaceType = None, **kwargs): """ :param level: (str) A string representing the gym level to run. This can also be a LevelSelection object. For example, BreakoutDeterministic-v0 :param frame_skip: (int) The number of frames to skip between any two actions given by the agent. The action will be repeated for all the skipped frames. :param visualization_parameters: (VisualizationParameters) The parameters used for visualizing the environment, such as the render flag, storing videos etc. :param additional_simulator_parameters: (Dict[str, Any]) Any additional parameters that the user can pass to the Gym environment. These parameters should be accepted by the __init__ function of the implemented Gym environment. :param seed: (int) A seed to use for the random number generator when running the environment. :param human_control: (bool) A flag that allows controlling the environment using the keyboard keys. :param custom_reward_threshold: (float) Allows defining a custom reward that will be used to decide when the agent succeeded in passing the environment. If not set, this value will be taken from the Gym environment definition. :param random_initialization_steps: (int) The number of random steps that will be taken in the environment after each reset. This is a feature presented in the DQN paper, which improves the variability of the episodes the agent sees. :param max_over_num_frames: (int) This value will be used for merging multiple frames into a single frame by taking the maximum value for each of the pixels in the frame. This is particularly used in Atari games, where the frames flicker, and objects can be seen in one frame but disappear in the next. :param observation_space_type: This value will be used for generating observation space. Allows a custom space. Should be one of ObservationSpaceType. If not specified, observation space is inferred from the number of dimensions of the observation: 1D: Vector space, 3D: Image space if 1 or 3 channels, PlanarMaps space otherwise. """ super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) self.random_initialization_steps = random_initialization_steps self.max_over_num_frames = max_over_num_frames self.additional_simulator_parameters = additional_simulator_parameters # hide warnings gym.logger.set_level(40) """ load and initialize environment environment ids can be defined in 3 ways: 1. Native gym environments like BreakoutDeterministic-v0 for example 2. Custom gym environments written and installed as python packages. This environments should have a python module with a class inheriting gym.Env, implementing the relevant functions (_reset, _step, _render) and defining the observation and action space For example: my_environment_package:MyEnvironmentClass will run an environment defined in the MyEnvironmentClass class 3. Custom gym environments written as an independent module which is not installed. This environments should have a python module with a class inheriting gym.Env, implementing the relevant functions (_reset, _step, _render) and defining the observation and action space. For example: path_to_my_environment.sub_directory.my_module:MyEnvironmentClass will run an environment defined in the MyEnvironmentClass class which is located in the module in the relative path path_to_my_environment.sub_directory.my_module """ if ':' in self.env_id: # custom environments if '/' in self.env_id or '.' in self.env_id: # environment in a an absolute path module written as a unix path or in a relative path module # written as a python import path env_class = short_dynamic_import(self.env_id) else: # environment in a python package env_class = gym.envs.registration.load(self.env_id) # instantiate the environment try: self.env = env_class(**self.additional_simulator_parameters) except: screen.error( "Failed to instantiate Gym environment class %s with arguments %s" % (env_class, self.additional_simulator_parameters), crash=False) raise else: self.env = gym.make(self.env_id) # for classic control we want to use the native renderer because otherwise we will get 2 renderer windows environment_to_always_use_with_native_rendering = [ 'classic_control', 'mujoco', 'robotics' ] self.native_rendering = self.native_rendering or \ any([env in str(self.env.unwrapped.__class__) for env in environment_to_always_use_with_native_rendering]) if self.native_rendering: if hasattr(self, 'renderer'): self.renderer.close() # seed if self.seed is not None: self.env.seed(self.seed) np.random.seed(self.seed) random.seed(self.seed) # frame skip and max between consecutive frames self.is_mujoco_env = 'mujoco' in str(self.env.unwrapped.__class__) self.is_roboschool_env = 'roboschool' in str( self.env.unwrapped.__class__) self.is_atari_env = 'Atari' in str(self.env.unwrapped.__class__) if self.is_atari_env: self.env.unwrapped.frameskip = 1 # this accesses the atari env that is wrapped with a timelimit wrapper env if self.env_id == "SpaceInvadersDeterministic-v4" and self.frame_skip == 4: screen.warning( "Warning: The frame-skip for Space Invaders was automatically updated from 4 to 3. " "This is following the DQN paper where it was noticed that a frame-skip of 3 makes the " "laser rays disappear. To force frame-skip of 4, please use SpaceInvadersNoFrameskip-v4." ) self.frame_skip = 3 self.env = MaxOverFramesAndFrameskipEnvWrapper( self.env, frameskip=self.frame_skip, max_over_num_frames=self.max_over_num_frames) else: self.env.unwrapped.frameskip = self.frame_skip self.state_space = StateSpace({}) # observations if not isinstance(self.env.observation_space, gym.spaces.dict.Dict): state_space = {'observation': self.env.observation_space} else: state_space = self.env.observation_space.spaces for observation_space_name, observation_space in state_space.items(): if observation_space_type == ObservationSpaceType.Tensor: # we consider arbitrary input tensor which does not necessarily represent images self.state_space[ observation_space_name] = TensorObservationSpace( shape=np.array(observation_space.shape), low=observation_space.low, high=observation_space.high) elif observation_space_type == ObservationSpaceType.Image or len( observation_space.shape) == 3: # we assume gym has image observations (with arbitrary number of channels) where their values are # within 0-255, and where the channel dimension is the last dimension if observation_space.shape[-1] in [1, 3]: self.state_space[ observation_space_name] = ImageObservationSpace( shape=np.array(observation_space.shape), high=255, channels_axis=-1) else: # For any number of channels other than 1 or 3, use the generic PlanarMaps space self.state_space[ observation_space_name] = PlanarMapsObservationSpace( shape=np.array(observation_space.shape), low=0, high=255, channels_axis=-1) elif observation_space_type == ObservationSpaceType.Vector or len( observation_space.shape) == 1: self.state_space[ observation_space_name] = VectorObservationSpace( shape=observation_space.shape[0], low=observation_space.low, high=observation_space.high) else: raise screen.error( "Failed to instantiate Gym environment class %s with observation space type %s" % (env_class, observation_space_type), crash=True) if 'desired_goal' in state_space.keys(): self.goal_space = self.state_space['desired_goal'] # actions if type(self.env.action_space) == gym.spaces.box.Box: self.action_space = BoxActionSpace( shape=self.env.action_space.shape, low=self.env.action_space.low, high=self.env.action_space.high) elif type(self.env.action_space) == gym.spaces.discrete.Discrete: actions_description = [] if hasattr(self.env.unwrapped, 'get_action_meanings'): actions_description = self.env.unwrapped.get_action_meanings() self.action_space = DiscreteActionSpace( num_actions=self.env.action_space.n, descriptions=actions_description) else: raise screen.error(( "Failed to instantiate gym environment class {} due to unsupported " "action space {}. Expected BoxActionSpace or DiscreteActionSpace." ).format(env_class, self.env.action_space), crash=True) if self.human_control: # TODO: add this to the action space # map keyboard keys to actions self.key_to_action = {} if hasattr(self.env.unwrapped, 'get_keys_to_action'): self.key_to_action = self.env.unwrapped.get_keys_to_action() else: screen.error( "Error: Environment {} does not support human control.". format(self.env), crash=True) # initialize the state by getting a new state from the environment self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() scale = 1 if self.human_control: scale = 2 if not self.native_rendering: self.renderer.create_screen(image.shape[1] * scale, image.shape[0] * scale) # the info is only updated after the first step self.state = self.step(self.action_space.default_action).next_state self.state_space['measurements'] = VectorObservationSpace( shape=len(self.info.keys())) if self.env.spec and custom_reward_threshold is None: self.reward_success_threshold = self.env.spec.reward_threshold self.reward_space = RewardSpace( 1, reward_success_threshold=self.reward_success_threshold) self.target_success_rate = target_success_rate
class ControlSuiteEnvironment(Environment): def __init__( self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, seed: Union[None, int] = None, human_control: bool = False, observation_type: ObservationType = ObservationType.Measurements, custom_reward_threshold: Union[int, float] = None, **kwargs): super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters) self.observation_type = observation_type # load and initialize environment domain_name, task_name = self.env_id.split(":") self.env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs={'random': seed}) if observation_type != ObservationType.Measurements: self.env = pixels.Wrapper( self.env, pixels_only=observation_type == ObservationType.Image) # seed if self.seed is not None: np.random.seed(self.seed) random.seed(self.seed) self.state_space = StateSpace({}) # image observations if observation_type != ObservationType.Measurements: self.state_space['pixels'] = ImageObservationSpace( shape=self.env.observation_spec()['pixels'].shape, high=255) # measurements observations if observation_type != ObservationType.Image: measurements_space_size = 0 measurements_names = [] for observation_space_name, observation_space in self.env.observation_spec( ).items(): if len(observation_space.shape) == 0: measurements_space_size += 1 measurements_names.append(observation_space_name) elif len(observation_space.shape) == 1: measurements_space_size += observation_space.shape[0] measurements_names.extend([ "{}_{}".format(observation_space_name, i) for i in range(observation_space.shape[0]) ]) self.state_space['measurements'] = VectorObservationSpace( shape=measurements_space_size, measurements_names=measurements_names) # actions self.action_space = BoxActionSpace( shape=self.env.action_spec().shape[0], low=self.env.action_spec().minimum, high=self.env.action_spec().maximum) # initialize the state by getting a new state from the environment self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() scale = 1 if self.human_control: scale = 2 if not self.native_rendering: self.renderer.create_screen(image.shape[1] * scale, image.shape[0] * scale) def _update_state(self): self.state = {} if self.observation_type != ObservationType.Measurements: self.pixels = self.last_result.observation['pixels'] self.state['pixels'] = self.pixels if self.observation_type != ObservationType.Image: self.measurements = np.array([]) for sub_observation in self.last_result.observation.values(): if isinstance(sub_observation, np.ndarray) and len( sub_observation.shape) == 1: self.measurements = np.concatenate( (self.measurements, sub_observation)) else: self.measurements = np.concatenate( (self.measurements, np.array([sub_observation]))) self.state['measurements'] = self.measurements self.reward = self.last_result.reward if self.last_result.reward is not None else 0 self.done = self.last_result.last() def _take_action(self, action): if type(self.action_space) == BoxActionSpace: action = self.action_space.clip_action_to_space(action) self.last_result = self.env.step(action) def _restart_environment_episode(self, force_environment_reset=False): self.last_result = self.env.reset() def _render(self): pass def get_rendered_image(self): return self.env.physics.render(camera_id=0)
def __init__( self, level: LevelSelection, frame_skip: int, visualization_parameters: VisualizationParameters, seed: Union[None, int] = None, human_control: bool = False, observation_type: ObservationType = ObservationType.Measurements, custom_reward_threshold: Union[int, float] = None, **kwargs): super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters) self.observation_type = observation_type # load and initialize environment domain_name, task_name = self.env_id.split(":") self.env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs={'random': seed}) if observation_type != ObservationType.Measurements: self.env = pixels.Wrapper( self.env, pixels_only=observation_type == ObservationType.Image) # seed if self.seed is not None: np.random.seed(self.seed) random.seed(self.seed) self.state_space = StateSpace({}) # image observations if observation_type != ObservationType.Measurements: self.state_space['pixels'] = ImageObservationSpace( shape=self.env.observation_spec()['pixels'].shape, high=255) # measurements observations if observation_type != ObservationType.Image: measurements_space_size = 0 measurements_names = [] for observation_space_name, observation_space in self.env.observation_spec( ).items(): if len(observation_space.shape) == 0: measurements_space_size += 1 measurements_names.append(observation_space_name) elif len(observation_space.shape) == 1: measurements_space_size += observation_space.shape[0] measurements_names.extend([ "{}_{}".format(observation_space_name, i) for i in range(observation_space.shape[0]) ]) self.state_space['measurements'] = VectorObservationSpace( shape=measurements_space_size, measurements_names=measurements_names) # actions self.action_space = BoxActionSpace( shape=self.env.action_spec().shape[0], low=self.env.action_spec().minimum, high=self.env.action_spec().maximum) # initialize the state by getting a new state from the environment self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() scale = 1 if self.human_control: scale = 2 if not self.native_rendering: self.renderer.create_screen(image.shape[1] * scale, image.shape[0] * scale)
def __init__(self, level: LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float, None], visualization_parameters: VisualizationParameters, base_parameters: RobosuiteBaseParameters, extra_parameters: Dict[str, Any], robot: str, controller: str, target_success_rate: float = 1.0, apply_dr: bool = False, dr_every_n_steps_min: int = 10, dr_every_n_steps_max: int = 20, use_joint_vel_obs=False, custom_controller_config_fpath=None, **kwargs): super(RobosuiteEnvironment, self).__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters, target_success_rate) # Validate arguments self.frame_skip = max(1, self.frame_skip) def validate_input(input, supported, name): if input not in supported: raise ValueError("Unknown Robosuite {0} passed: '{1}' ; Supported {0}s are: {2}".format( name, input, ' | '.join(supported) )) validate_input(self.env_id, robosuite_environments, 'environment') validate_input(robot, robosuite_robots, 'robot') self.robot = robot if controller is not None: validate_input(controller, robosuite_controllers, 'controller') self.controller = controller self.base_parameters = base_parameters self.base_parameters.has_renderer = self.is_rendered and self.native_rendering self.base_parameters.has_offscreen_renderer = self.base_parameters.use_camera_obs or (self.is_rendered and not self.native_rendering) # Seed if self.seed is not None: np.random.seed(self.seed) random.seed(self.seed) # Load and initialize environment env_args = self.base_parameters.env_kwargs_dict() env_args.update(extra_parameters) if 'reward_scale' not in env_args and self.env_id in DEFAULT_REWARD_SCALES: env_args['reward_scale'] = DEFAULT_REWARD_SCALES[self.env_id] env_args['robots'] = self.robot controller_cfg = None if self.controller is not None: controller_cfg = robosuite.controllers.load_controller_config(default_controller=self.controller) elif custom_controller_config_fpath is not None: controller_cfg = robosuite.controllers.load_controller_config(custom_fpath=custom_controller_config_fpath) env_args['controller_configs'] = controller_cfg self.env = robosuite.make(self.env_id, **env_args) # TODO: Generalize this to filter any observation by name if not use_joint_vel_obs: self.env.modify_observable('robot0_joint_vel', 'active', False) # Wrap with a dummy wrapper so we get a consistent API (there are subtle changes between # wrappers and actual environments in Robosuite, for example action_spec as property vs. function) self.env = Wrapper(self.env) if apply_dr: self.env = DomainRandomizationWrapper(self.env, seed=self.seed, randomize_every_n_steps_min=dr_every_n_steps_min, randomize_every_n_steps_max=dr_every_n_steps_max) # State space self.state_space = self._setup_state_space() # Action space low, high = self.env.unwrapped.action_spec self.action_space = BoxActionSpace(low.shape, low=low, high=high) self.reset_internal_state() if self.is_rendered: image = self.get_rendered_image() self.renderer.create_screen(image.shape[1], image.shape[0])