def __init__( self, room=None, # Specify either room or room type room_type='empty', # Choose from ['empty', 'wall', 'rooms'] potential_type="euclidean", # Choose from ['none' (no shaping) ,'shaped' (shortest distance between COMs), 'euclidean' (euclidean distance between states)] base_reward='positive', # Choose from ['positive'(0,1), 'negative' (-1,0)] shaped=False, speed=1, *args, **kwargs): Serializable.quick_init(self, locals()) self.use_images = False room_defaults = dict( empty=room_world.rooms.Room('pm', 1.2, 1.2), wall=room_world.rooms.RoomWithWall('pm', 1.2, 1.2), rooms=room_world.rooms.FourRoom('pm', 1.2, 1.2), ) if room is None: room = room_defaults[room_type] super().__init__(room=room, potential_type=potential_type, shaped=shaped, base_reward=base_reward, *args, **kwargs) new_frame_skip = int(np.ceil(self.frame_skip * speed)) self.modifier = self.frame_skip * speed / new_frame_skip self.frame_skip = new_frame_skip
def __init__( self, room=None, # Specify either room or room type room_type='empty', # Choose from ['empty', 'wall', 'rooms'] potential_type="euclidean", # Choose from ['none' (no shaping) ,'shaped' (shortest distance between COMs), 'euclidean' (euclidean distance between states)] shaped=False, *args, **kwargs): Serializable.quick_init(self, locals()) self.use_images = False room_defaults = dict( empty=room_world.rooms.Room('pm', 1.2, 1.2), wall=room_world.rooms.RoomWithWall('pm', 1.2, 1.2), rooms=room_world.rooms.FourRoom('pm', 1.2, 1.2), long=room_world.rooms.Room('pm', 0.3, 7.2), ) if room is None: room = room_defaults[room_type] super().__init__(room=room, potential_type=potential_type, shaped=shaped, base_reward='com', *args, **kwargs)
def __init__( self, env, obs_keys=None, goal_keys=None, append_goal_to_obs=True, ): Serializable.quick_init(self, locals()) self.env = env if obs_keys is None: obs_keys = ['observation'] if goal_keys is None: goal_keys = ['desired_goal'] if append_goal_to_obs: obs_keys += goal_keys for k in obs_keys: assert k in self.env.observation_space.spaces assert isinstance(self.env.observation_space, Dict) self.obs_keys = obs_keys self.goal_keys = goal_keys # TODO: handle nested dict self.observation_space = Box( np.hstack([ self.env.observation_space.spaces[k].low for k in obs_keys ]), np.hstack([ self.env.observation_space.spaces[k].high for k in obs_keys ]), ) self.action_space = self.env.action_space
def __init__(self, env, initial_configurations, domain='obj_pos'): assert not env.random_init,\ "Please use this wrapper when setting environment's random_init as False!!" Serializable.quick_init(self, locals()) super().__init__(env) self._domain = domain self._initial_configs = initial_configurations self._current_idx = 0 self._n_initial_configs = len(self._initial_configs)
def __init__(self, **kwargs): Serializable.quick_init(self, locals()) sawyer_reaching.SawyerReachXYZEnv.__init__(self, **kwargs) self.observation_space = Dict([ ('observation', self.observation_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.observation_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.goal_space), ])
def __init__(self, **kwargs): Serializable.quick_init(self, locals()) super().__init__(self, **kwargs) self.observation_space = Dict([ ('observation', self.observation_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.observation_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.goal_space), ]) self.reset()
def __init__(self, base='pm', length=1.2, width=1.2, start=None, target=None): Serializable.quick_init(self, locals()) if start is None: start = np.array((-length / 6, -width / 4)) if target is None: target = np.array((-width / 6, width / 4)) super().__init__(base, length, width, start, target)
def __init__(self, door_open_epsilon=2, **kwargs ): self.door_open_epsilon=door_open_epsilon Serializable.quick_init(self, locals()) sawyer_door.SawyerDoorEnv.__init__(self, **kwargs) self.observation_space = Dict([ ('observation', self.observation_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.observation_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.goal_space), ]) self.set_mode('eval')
def __init__( self, env, obs_means=None, obs_stds=None, obs_to_normalize_keys=['observation'], ): # self._wrapped_env needs to be called first because # Serializable.quick_init calls getattr, on this class. And the # implementation of getattr (see below) calls self._wrapped_env. # Without setting this first, the call to self._wrapped_env would call # getattr again (since it's not set yet) and therefore loop forever. # Or else serialization gets delegated to the wrapped_env. Serialize # this env separately from the wrapped_env. self._wrapped_env = env Serializable.quick_init(self, locals()) ProxyEnv.__init__(self, env) self._should_normalize = not (obs_means is None and obs_stds is None) num_obs_types = len(obs_to_normalize_keys) if self._should_normalize: if obs_means is None: obs_means = dict() for key in self.obs_to_normalize_keys: obs_means[key] = np.zeros_like( env.observation_space[key].low) else: obs_means = dict() for key in self.obs_to_normalize_keys: obs_means[key] = np.array(obs_means[key]) if obs_stds is None: obs_stds = dict() for key in self.obs_to_normalize_keys: obs_stds[key] = np.zeros_like( env.observation_space[key].low) else: obs_stds = dict() for key in self.obs_to_normalize_keys: obs_stds[key] = np.array(obs_stds[key]) self._obs_means = obs_means self._obs_stds = obs_stds ub = np.ones(self._wrapped_env.action_space.shape) self.action_space = Box(-1 * ub, ub) self.obs_to_normalize_keys = obs_to_normalize_keys
def __init__(self, # Room room, # Start and Goal start_config="all", goal_config="all", # Reward potential_type='', shaped=False, base_reward='', # State and Goal Representations use_state_images=False, use_goal_images=False, image_resolution=64, # Time Limits max_path_length=None, *args, **kwargs ): # Initialize Serializable.quick_init(self, locals()) MultitaskEnv.__init__(self) # Environment Configuration self._room = room model = self.room.get_mjcmodel() self.possible_positions = self.room.XY(n=50) with model.asfile() as f: MujocoEnv.__init__(self, f.name, frame_skip=self.FRAME_SKIP) # Initialization self.start_config = start_config self.baseline_start = self.room.get_start() self.start = np.zeros_like(self.baseline_start) self.goal_config = goal_config self.baseline_goal = self.room.get_target() self.goal = np.zeros_like(self.baseline_goal) # Time Limit self.curr_path_length = 0 if max_path_length is None: self.max_path_length = self.MAX_PATH_LENGTH else: self.max_path_length = max_path_length # Reward Functions self.potential_type = potential_type self.shaped = shaped self.base_reward = base_reward # Action Space bounds = self.model.actuator_ctrlrange.copy() self.action_space = Box(low=bounds[:, 0], high=bounds[:, 1]) self.use_state_images = use_state_images self.use_goal_images = use_goal_images self.image_resolution = image_resolution # Observation Space example_state_obs = self._get_env_obs() if self.use_state_images: example_obs = self.get_image(self.image_resolution, self.image_resolution, camera_name='topview') else: example_obs = example_state_obs state_obs_shape = example_obs.shape obs_shape = example_obs.shape self.obs_space = Box(-1 * np.ones(obs_shape),np.ones(obs_shape)) self.state_obs_space = Box(-1 * np.ones(state_obs_shape), np.ones(state_obs_shape)) # Goal Space example_state_goal = self._get_env_achieved_goal(example_state_obs) if self.use_goal_images: example_goal = self.get_image(self.image_resolution, self.image_resolution, camera_name='topview') else: example_goal = example_state_goal state_goal_shape = example_state_goal.shape goal_shape = example_goal.shape self.goal_space = Box(-1 * np.ones(goal_shape), np.ones(goal_shape)) self.state_goal_space = Box(-1 * np.ones(state_goal_shape), np.ones(state_goal_shape)) # Final Setup self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.state_obs_space), ('state_desired_goal', self.state_goal_space), ('state_achieved_goal', self.state_goal_space), ]) self.reset()
def __setstate__(self, d): Serializable.__setstate__(self, d) self._obs_means = d["_obs_means"] self._obs_stds = d["_obs_stds"]
def __getstate__(self): d = Serializable.__getstate__(self) # Add these explicitly in case they were modified d["_obs_means"] = self._obs_means d["_obs_stds"] = self._obs_stds return d
def __init__(self, env): Serializable.quick_init(self, locals()) self.env = env self._observation_space = convert_gym_space(env.observation_space) self._action_space = convert_gym_space(env.action_space) self._horizon = 500