def __init__(self, *args, **kwargs):
     self.random_start = kwargs.get("random_start", True)
     MujocoEnv.__init__(
         self,
         os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets",
                      "inverted_double_pendulum.xml"))
     Serializable.quick_init(self, locals())
    def __init__(self, env_spec, max_replay_buffer_size):
        Serializable.quick_init(self, locals())
        super(SimpleReplayBuffer, self).__init__(env_spec)

        max_replay_buffer_size = int(max_replay_buffer_size)
        # TODO: add support for changing buffer datatypes and attributes

        self._env_spec = env_spec
        self._observation_dim = env_spec.observation_space.flat_dim
        self._action_dim = env_spec.action_space.flat_dim
        self._max_buffer_size = max_replay_buffer_size
        self._observations = np.zeros(
            (max_replay_buffer_size, self._observation_dim), dtype='float32')
        # It's a bit memory inefficient to save the observations twice,
        # but it makes the code *much* easier since you no longer have to
        # worry about termination conditions.
        # TODO: add logic for termination condition to save memory
        # TODO: at least convert skill to int or use sparse matrix to save mem
        self._next_obs = np.zeros(
            (max_replay_buffer_size, self._observation_dim), dtype='float32')
        self._actions = np.zeros((max_replay_buffer_size, self._action_dim),
                                 dtype='float32')
        self._rewards = np.zeros(max_replay_buffer_size, dtype='float32')
        # self._terminals[i] = a terminal was received at time i
        self._dones = np.zeros(max_replay_buffer_size, dtype='uint8')
        self._top = 0
        self._size = 0
Esempio n. 3
0
 def __init__(self, observation_space, action_space):
     """
     :type observation_space: Space
     :type action_space: Space
     """
     Serializable.quick_init(self, locals())
     self._observation_space = observation_space
     self._action_space = action_space
Esempio n. 4
0
 def __init__(
         self,
         alive_coeff=1,
         ctrl_cost_coeff=0.01,
         *args, **kwargs):
     self.alive_coeff = alive_coeff
     self.ctrl_cost_coeff = ctrl_cost_coeff
     MujocoEnv.__init__(self, os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets", "hopper.xml"))
     Serializable.quick_init(self, locals())
Esempio n. 5
0
    def __init__(self,
                 env_name,
                 record_video=False,
                 video_schedule=None,
                 log_dir=None,
                 record_log=False,
                 force_reset=True,
                 seed=1):
        # if log_dir is None:
        #     if logger.get_snapshot_dir() is None:
        #         logger.log("Warning: skipping Gym environment monitoring since snapshot_dir not configured.")
        #     else:
        #         log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log")
        Serializable.quick_init(self, locals())

        env = gym.envs.make(env_name)
        env.seed(seed)

        # HACK: Gets rid of the TimeLimit wrapper that sets 'done = True' when
        # the time limit specified for each environment has been passed and
        # therefore the environment is not Markovian (terminal condition depends
        # on time rather than state).
        env = env.env

        self.env = env
        self.env_id = env.spec.id

        assert not (not record_log and record_video)

        if log_dir is None or record_log is False:
            self.monitoring = False
        else:
            if not record_video:
                video_schedule = NoVideoSchedule()
            else:
                if video_schedule is None:
                    video_schedule = CappedCubicVideoSchedule()
            self.env = gym.wrappers.Monitor(self.env,
                                            log_dir,
                                            video_callable=video_schedule,
                                            force=True)
            self.monitoring = True

        self._observation_space = convert_gym_space(env.observation_space)
        # logger.log("observation space: {}".format(self._observation_space))
        self._action_space = convert_gym_space(env.action_space)
        # logger.log("action space: {}".format(self._action_space))
        self._horizon = env.spec.tags[
            'wrapper_config.TimeLimit.max_episode_steps']
        self._log_dir = log_dir
        self._force_reset = force_reset
 def __init__(self,
              vel_deviation_cost_coeff=1e-2,
              alive_bonus=0.2,
              ctrl_cost_coeff=1e-3,
              impact_cost_coeff=1e-5,
              *args,
              **kwargs):
     self.vel_deviation_cost_coeff = vel_deviation_cost_coeff
     self.alive_bonus = alive_bonus
     self.ctrl_cost_coeff = ctrl_cost_coeff
     self.impact_cost_coeff = impact_cost_coeff
     MujocoEnv.__init__(
         self,
         os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets",
                      "simple_humanoid.xml"))
     Serializable.quick_init(self, locals())
Esempio n. 7
0
    def __init__(self, env, reward_fn):
        Serializable.quick_init(self, locals())

        self._wrapped_env = env
        self.reward_function = reward_fn

        self.num_skills = num_skills
        self.z = skill
        obs_space = self.observation_space
        low = np.hstack([obs_space.low, np.full(num_skills, 0)])
        high = np.hstack([obs_space.high, np.full(num_skills, 1)])
        self.observation_space = spaces.Box(low=low, high=high)
        # TODO verfiy below line is doing something  useful
        self.action_space = self.action_space
        print("Veriifying action space: ", self.action_space)
        self.spec = EnvSpec(self.observation_space, self.action_space)
        self.reset(state=None, skill=self.z)
    def __init__(
        self,
        env,
        scale_reward=1.,
        normalize_obs=False,
        normalize_reward=False,
        obs_alpha=0.001,
        reward_alpha=0.001,
        normalization_scale=1.,
    ):
        Serializable.quick_init(self, locals())

        self._scale_reward = 1
        self._wrapped_env = env

        self._normalize_obs = normalize_obs
        self._normalize_reward = normalize_reward
        self._obs_alpha = obs_alpha
        self._obs_mean = np.zeros(self.observation_space.shape)
        self._obs_var = np.ones(self.observation_space.shape)
        self._reward_alpha = reward_alpha
        self._reward_mean = 0.
        self._reward_var = 1.
        self._normalization_scale = normalization_scale
Esempio n. 9
0
 def __init__(self, *args, **kwargs):
     MujocoEnv.__init__(
         self,
         os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets",
                      "half_cheetah.xml"))
     Serializable.__init__(self, *args, **kwargs)
Esempio n. 10
0
 def __init__(self, *args, **kwargs):
     super(AntEnv, self).__init__(*args, **kwargs)
     Serializable.__init__(self, *args, **kwargs)
 def __setstate__(self, d):
     Serializable.__setstate__(self, d)
     self._obs_mean = d["_obs_mean"]
     self._obs_var = d["_obs_var"]
 def __getstate__(self):
     d = Serializable.__getstate__(self)
     d["_obs_mean"] = self._obs_mean
     d["_obs_var"] = self._obs_var
     return d