Esempio n. 1
0
 def __init__(
     self,
     max_speed=0.05,
     max_distance=1,
     use_low_gear_ratio=True,
     speed_weight=0.9,
     done_threshold=0.005,
     goal_dim_weights=None,
 ):
     Serializable.quick_init(self, locals())
     self.max_distance = max_distance
     self.max_speed = max_speed
     self.speed_weight = speed_weight
     self.done_threshold = done_threshold
     self.initializing = True
     # TODO: fix this hack
     if speed_weight is None:
         self.speed_weight = 0.9  # just for init to work
     MultitaskEnv.__init__(self, goal_dim_weights=goal_dim_weights)
     super().__init__(use_low_gear_ratio=use_low_gear_ratio)
     self.set_goal(
         np.array([
             self.max_distance,
             self.max_distance,
             self.max_speed,
             self.max_speed,
         ]))
     self.initializing = False
     if speed_weight is None:
         assert (self.goal_dim_weights[0] == self.goal_dim_weights[1]) and (
             self.goal_dim_weights[2] == self.goal_dim_weights[3])
         self.speed_weight = self.goal_dim_weights[2]
     assert 0 <= self.speed_weight <= 1
Esempio n. 2
0
    def __init__(
        self,
        env: MultitaskEnv,
        give_goal_difference=False,
    ):
        # self._wrapped_env needs to be called first because
        # Serializable.quick_init calls getattr, on this class. And the
        # implementation of getattr (see below) calls self._wrapped_env.
        # Without setting this first, the call to self._wrapped_env would call
        # getattr again (since it's not set yet) and therefore loop forever.
        self._wrapped_env = env
        # Or else serialization gets delegated to the wrapped_env. Serialize
        # this env separately from the wrapped_env.
        self._serializable_initialized = False
        self._wrapped_obs_dim = env.observation_space.low.size
        self.give_goal_difference = give_goal_difference
        Serializable.quick_init(self, locals())
        ProxyEnv.__init__(self, env)

        wrapped_low = self.observation_space.low
        low = np.hstack(
            (wrapped_low,
             min(wrapped_low) * np.ones(self._wrapped_env.goal_dim)))
        wrapped_high = self.observation_space.low
        high = np.hstack(
            (wrapped_high,
             max(wrapped_high) * np.ones(self._wrapped_env.goal_dim)))
        self.observation_space = Box(low, high)
Esempio n. 3
0
 def __init__(
     self,
     env,
     reward_scale=1.,
     obs_mean=None,
     obs_std=None,
 ):
     # self._wrapped_env needs to be called first because
     # Serializable.quick_init calls getattr, on this class. And the
     # implementation of getattr (see below) calls self._wrapped_env.
     # Without setting this first, the call to self._wrapped_env would call
     # getattr again (since it's not set yet) and therefore loop forever.
     self._wrapped_env = env
     # Or else serialization gets delegated to the wrapped_env. Serialize
     # this env separately from the wrapped_env.
     self._serializable_initialized = False
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     self._should_normalize = not (obs_mean is None and obs_std is None)
     if self._should_normalize:
         if obs_mean is None:
             obs_mean = np.zeros_like(env.observation_space.low)
         else:
             obs_mean = np.array(obs_mean)
         if obs_std is None:
             obs_std = np.ones_like(env.observation_space.low)
         else:
             obs_std = np.array(obs_std)
     self._reward_scale = reward_scale
     self._obs_mean = obs_mean
     self._obs_std = obs_std
     ub = np.ones(self._wrapped_env.action_space.shape)
     self.action_space = Box(-1 * ub, ub)
Esempio n. 4
0
 def __init__(
     self,
     action_space,
     mu=0,
     theta=0.15,
     max_sigma=0.3,
     min_sigma=None,
     decay_period=100000,
 ):
     Serializable.quick_init(self, locals())
     if min_sigma is None:
         min_sigma = max_sigma
     self.mu = mu
     self.theta = theta
     self.sigma = max_sigma
     self._max_sigma = max_sigma
     if min_sigma is None:
         min_sigma = max_sigma
     self._min_sigma = min_sigma
     self._decay_period = decay_period
     self.dim = np.prod(action_space.low.shape)
     self.low = action_space.low
     self.high = action_space.high
     self.state = np.ones(self.dim) * self.mu
     self.reset()
Esempio n. 5
0
 def __init__(self, action_space, max_sigma=1.0, min_sigma=None,
              decay_period=1000000):
     assert len(action_space.shape) == 1
     Serializable.quick_init(self, locals())
     self._max_sigma = max_sigma
     if min_sigma is None:
         min_sigma = max_sigma
     self._min_sigma = min_sigma
     self._decay_period = decay_period
     self._action_space = action_space
Esempio n. 6
0
 def __init__(self,
              min_distance=0,
              max_distance=2,
              use_low_gear_ratio=True):
     Serializable.quick_init(self, locals())
     self.max_distance = max_distance
     self.min_distance = min_distance
     MultitaskEnv.__init__(self)
     super().__init__(use_low_gear_ratio=use_low_gear_ratio)
     self.set_goal(np.array([self.max_distance, self.max_distance]))
Esempio n. 7
0
 def __init__(self, distance_metric_order=None, goal_dim_weights=None):
     self._desired_xyz = np.zeros(3)
     Serializable.quick_init(self, locals())
     MultitaskEnv.__init__(
         self,
         distance_metric_order=distance_metric_order,
         goal_dim_weights=goal_dim_weights,
     )
     mujoco_env.MujocoEnv.__init__(
         self,
         get_asset_xml('reacher_7dof.xml'),
         5,
     )
     self.observation_space = Box(
         np.array([
             -2.28,
             -0.52,
             -1.4,
             -2.32,
             -1.5,
             -1.094,
             -1.5,  # joint
             -3,
             -3,
             -3,
             -3,
             -3,
             -3,
             -3,  # velocity
             -0.75,
             -1.25,
             -0.2,  # EE xyz
         ]),
         np.array([
             1.71,
             1.39,
             1.7,
             0,
             1.5,
             0,
             1.5,  # joints
             3,
             3,
             3,
             3,
             3,
             3,
             3,  # velocity
             0.75,
             0.25,
             0.6,  # EE xyz
         ]))
Esempio n. 8
0
 def __getstate__(self):
     d = Serializable.__getstate__(self)
     # Add these explicitly in case they were modified
     d["_obs_mean"] = self._obs_mean
     d["_obs_std"] = self._obs_std
     d["_reward_scale"] = self._reward_scale
     return d
Esempio n. 9
0
 def __setstate__(self, d):
     Serializable.__setstate__(self, d)
     self._obs_mean = d["_obs_mean"]
     self._obs_std = d["_obs_std"]
     self._reward_scale = d["_reward_scale"]
Esempio n. 10
0
 def __init__(self, wrapped_env):
     Serializable.quick_init(self, locals())
     self._wrapped_env = wrapped_env
     self.action_space = self._wrapped_env.action_space
     self.observation_space = self._wrapped_env.observation_space
Esempio n. 11
0
 def __init__(self):
     Serializable.quick_init(self, locals())
     self.target_x_vel = np.random.uniform(-MAX_SPEED, MAX_SPEED)
     super().__init__()
     MultitaskEnv.__init__(self)
     self.set_goal(np.array([5]))
Esempio n. 12
0
 def __setstate__(self, state):
     return Serializable.__setstate__(self, state)
Esempio n. 13
0
 def __getstate__(self):
     return Serializable.__getstate__(self)
Esempio n. 14
0
 def init_serialization(self, locals):
     Serializable.quick_init(self, locals)
Esempio n. 15
0
 def __init__(self, action_space, prob_random_action=0.1):
     Serializable.quick_init(self, locals())
     assert isinstance(action_space, Discrete)
     Serializable.quick_init(self, locals())
     self.prob_random_action = prob_random_action
     self.action_space = action_space