예제 #1
0
    def __init__(
            self,
            room=None,  # Specify either room or room type
            room_type='empty',  # Choose from ['empty', 'wall', 'rooms']
            potential_type="euclidean",  # Choose from ['none' (no shaping) ,'shaped' (shortest distance between COMs), 'euclidean' (euclidean distance between states)]
            base_reward='positive',  # Choose from ['positive'(0,1), 'negative' (-1,0)]
            shaped=False,
            speed=1,
            *args,
            **kwargs):

        Serializable.quick_init(self, locals())
        self.use_images = False
        room_defaults = dict(
            empty=room_world.rooms.Room('pm', 1.2, 1.2),
            wall=room_world.rooms.RoomWithWall('pm', 1.2, 1.2),
            rooms=room_world.rooms.FourRoom('pm', 1.2, 1.2),
        )
        if room is None:
            room = room_defaults[room_type]

        super().__init__(room=room,
                         potential_type=potential_type,
                         shaped=shaped,
                         base_reward=base_reward,
                         *args,
                         **kwargs)

        new_frame_skip = int(np.ceil(self.frame_skip * speed))
        self.modifier = self.frame_skip * speed / new_frame_skip
        self.frame_skip = new_frame_skip
예제 #2
0
    def __init__(
            self,
            room=None,  # Specify either room or room type
            room_type='empty',  # Choose from ['empty', 'wall', 'rooms']
            potential_type="euclidean",  # Choose from ['none' (no shaping) ,'shaped' (shortest distance between COMs), 'euclidean' (euclidean distance between states)]
            shaped=False,
            *args,
            **kwargs):

        Serializable.quick_init(self, locals())
        self.use_images = False
        room_defaults = dict(
            empty=room_world.rooms.Room('pm', 1.2, 1.2),
            wall=room_world.rooms.RoomWithWall('pm', 1.2, 1.2),
            rooms=room_world.rooms.FourRoom('pm', 1.2, 1.2),
            long=room_world.rooms.Room('pm', 0.3, 7.2),
        )
        if room is None:
            room = room_defaults[room_type]

        super().__init__(room=room,
                         potential_type=potential_type,
                         shaped=shaped,
                         base_reward='com',
                         *args,
                         **kwargs)
예제 #3
0
    def __init__(
            self,
            env,
            obs_keys=None,
            goal_keys=None,
            append_goal_to_obs=True,
    ):
        Serializable.quick_init(self, locals())
        self.env = env

        if obs_keys is None:
            obs_keys = ['observation']
        if goal_keys is None:
            goal_keys = ['desired_goal']
        if append_goal_to_obs:
            obs_keys += goal_keys
        for k in obs_keys:
            assert k in self.env.observation_space.spaces
        
        assert isinstance(self.env.observation_space, Dict)

        self.obs_keys = obs_keys
        self.goal_keys = goal_keys
        # TODO: handle nested dict
        self.observation_space = Box(
            np.hstack([
                self.env.observation_space.spaces[k].low
                for k in obs_keys
            ]),
            np.hstack([
                self.env.observation_space.spaces[k].high
                for k in obs_keys
            ]),
        )
        self.action_space = self.env.action_space
 def __init__(self, env, initial_configurations, domain='obj_pos'):
     assert not env.random_init,\
         "Please use this wrapper when setting environment's random_init as False!!"
     Serializable.quick_init(self, locals())
     super().__init__(env)
     self._domain = domain
     self._initial_configs = initial_configurations
     self._current_idx = 0
     self._n_initial_configs = len(self._initial_configs)
예제 #5
0
 def __init__(self, **kwargs):
     Serializable.quick_init(self, locals())
     sawyer_reaching.SawyerReachXYZEnv.__init__(self, **kwargs)
     self.observation_space = Dict([
         ('observation', self.observation_space),
         ('desired_goal', self.goal_space),
         ('achieved_goal', self.goal_space),
         ('state_observation', self.observation_space),
         ('state_desired_goal', self.goal_space),
         ('state_achieved_goal', self.goal_space),
     ])
예제 #6
0
 def __init__(self, **kwargs):
     Serializable.quick_init(self, locals())
     super().__init__(self, **kwargs)
     self.observation_space = Dict([
         ('observation', self.observation_space),
         ('desired_goal', self.goal_space),
         ('achieved_goal', self.goal_space),
         ('state_observation', self.observation_space),
         ('state_desired_goal', self.goal_space),
         ('state_achieved_goal', self.goal_space),
     ])
     self.reset()
예제 #7
0
    def __init__(self,
                 base='pm',
                 length=1.2,
                 width=1.2,
                 start=None,
                 target=None):
        Serializable.quick_init(self, locals())

        if start is None:
            start = np.array((-length / 6, -width / 4))

        if target is None:
            target = np.array((-width / 6, width / 4))

        super().__init__(base, length, width, start, target)
예제 #8
0
 def __init__(self,
              door_open_epsilon=2,
              **kwargs
             ):
     self.door_open_epsilon=door_open_epsilon
     Serializable.quick_init(self, locals())
     sawyer_door.SawyerDoorEnv.__init__(self, **kwargs)
     self.observation_space = Dict([
         ('observation', self.observation_space),
         ('desired_goal', self.goal_space),
         ('achieved_goal', self.goal_space),
         ('state_observation', self.observation_space),
         ('state_desired_goal', self.goal_space),
         ('state_achieved_goal', self.goal_space),
     ])
     self.set_mode('eval')
예제 #9
0
 def __init__(
     self,
     env,
     obs_means=None,
     obs_stds=None,
     obs_to_normalize_keys=['observation'],
 ):
     # self._wrapped_env needs to be called first because
     # Serializable.quick_init calls getattr, on this class. And the
     # implementation of getattr (see below) calls self._wrapped_env.
     # Without setting this first, the call to self._wrapped_env would call
     # getattr again (since it's not set yet) and therefore loop forever.
     # Or else serialization gets delegated to the wrapped_env. Serialize
     # this env separately from the wrapped_env.
     self._wrapped_env = env
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     self._should_normalize = not (obs_means is None and obs_stds is None)
     num_obs_types = len(obs_to_normalize_keys)
     if self._should_normalize:
         if obs_means is None:
             obs_means = dict()
             for key in self.obs_to_normalize_keys:
                 obs_means[key] = np.zeros_like(
                     env.observation_space[key].low)
         else:
             obs_means = dict()
             for key in self.obs_to_normalize_keys:
                 obs_means[key] = np.array(obs_means[key])
         if obs_stds is None:
             obs_stds = dict()
             for key in self.obs_to_normalize_keys:
                 obs_stds[key] = np.zeros_like(
                     env.observation_space[key].low)
         else:
             obs_stds = dict()
             for key in self.obs_to_normalize_keys:
                 obs_stds[key] = np.array(obs_stds[key])
     self._obs_means = obs_means
     self._obs_stds = obs_stds
     ub = np.ones(self._wrapped_env.action_space.shape)
     self.action_space = Box(-1 * ub, ub)
     self.obs_to_normalize_keys = obs_to_normalize_keys
예제 #10
0
    def __init__(self,
                # Room
                room,
                # Start and Goal
                 start_config="all",
                 goal_config="all",
                # Reward
                 potential_type='',
                 shaped=False,
                 base_reward='',
                # State and Goal Representations
                 use_state_images=False,
                 use_goal_images=False,
                 image_resolution=64,
                 # Time Limits 
                 max_path_length=None,
                 *args, **kwargs
                ):

        # Initialize
        Serializable.quick_init(self, locals())
        MultitaskEnv.__init__(self)
        
        
        # Environment Configuration
        self._room = room
        model = self.room.get_mjcmodel()
        self.possible_positions = self.room.XY(n=50)

        with model.asfile() as f:
            MujocoEnv.__init__(self, f.name, frame_skip=self.FRAME_SKIP)


        # Initialization 
        self.start_config = start_config
        self.baseline_start = self.room.get_start()
        self.start = np.zeros_like(self.baseline_start)

        self.goal_config = goal_config
        self.baseline_goal = self.room.get_target()
        self.goal = np.zeros_like(self.baseline_goal)

        # Time Limit
        self.curr_path_length = 0
        if max_path_length is None:
            self.max_path_length = self.MAX_PATH_LENGTH
        else:
            self.max_path_length = max_path_length

        # Reward Functions
        self.potential_type = potential_type
        self.shaped = shaped
        self.base_reward = base_reward

        # Action Space
        bounds = self.model.actuator_ctrlrange.copy()
        self.action_space = Box(low=bounds[:, 0], high=bounds[:, 1])

        self.use_state_images = use_state_images
        self.use_goal_images = use_goal_images
        self.image_resolution = image_resolution

        # Observation Space
        example_state_obs = self._get_env_obs()
        if self.use_state_images:
            example_obs = self.get_image(self.image_resolution, self.image_resolution, camera_name='topview')
        else:
            example_obs = example_state_obs
        
        state_obs_shape = example_obs.shape
        obs_shape = example_obs.shape
        self.obs_space = Box(-1 * np.ones(obs_shape),np.ones(obs_shape))
        self.state_obs_space = Box(-1 * np.ones(state_obs_shape), np.ones(state_obs_shape))


        # Goal Space
        
        
        example_state_goal = self._get_env_achieved_goal(example_state_obs)
        if self.use_goal_images:
            example_goal = self.get_image(self.image_resolution, self.image_resolution, camera_name='topview')
        else:
            example_goal = example_state_goal

        state_goal_shape = example_state_goal.shape
        goal_shape = example_goal.shape
        self.goal_space = Box(-1 * np.ones(goal_shape), np.ones(goal_shape))
        self.state_goal_space =  Box(-1 * np.ones(state_goal_shape), np.ones(state_goal_shape))

        # Final Setup
        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.goal_space),
            ('state_observation', self.state_obs_space),
            ('state_desired_goal', self.state_goal_space),
            ('state_achieved_goal', self.state_goal_space),
        ])

        self.reset()
예제 #11
0
 def __setstate__(self, d):
     Serializable.__setstate__(self, d)
     self._obs_means = d["_obs_means"]
     self._obs_stds = d["_obs_stds"]
예제 #12
0
 def __getstate__(self):
     d = Serializable.__getstate__(self)
     # Add these explicitly in case they were modified
     d["_obs_means"] = self._obs_means
     d["_obs_stds"] = self._obs_stds
     return d
예제 #13
0
 def __init__(self, env):
     Serializable.quick_init(self, locals())
     self.env = env
     self._observation_space = convert_gym_space(env.observation_space)
     self._action_space = convert_gym_space(env.action_space)
     self._horizon = 500