Beispiel #1
0
 def __init__(self, *args, **kwargs):
     xml_path = os.path.split(
         os.path.realpath(__file__))[0] + '/models/silvia' + str(
             kwargs['legs']) + '.xml'
     MujocoEnv.__init__(self, xml_path, 5)
     utils.EzPickle.__init__(self)
     Serializable.__init__(self, *args, **kwargs)
Beispiel #2
0
 def __setstate__(self, d):
     Serializable.__setstate__(self, d)
     global load_params
     if load_params:
         tf.get_default_session().run(
             tf.variables_initializer(self.get_params()))
         self.set_param_values(d["params"])
Beispiel #3
0
    def __init__(self, env_spec, obs_pl, action, scope_name=None):
        Serializable.quick_init(self, locals())

        self._obs_pl = obs_pl
        self._action = action
        self._scope_name = (tf.get_variable_scope().name
                            if not scope_name else scope_name)
        super(NNPolicy, self).__init__(env_spec)
Beispiel #4
0
 def __init__(self, observation_space, action_space):
     """
     :type observation_space: Space
     :type action_space: Space
     """
     Serializable.quick_init(self, locals())
     self._observation_space = observation_space
     self._action_space = action_space
Beispiel #5
0
    def __init__(self, inputs, name, hidden_layer_sizes):
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())

        self._name = name
        self._inputs = inputs
        self._layer_sizes = list(hidden_layer_sizes) + [1]

        self._output = self._output_for(self._inputs)
Beispiel #6
0
    def __init__(self,
                 env_name,
                 record_video=False,
                 video_schedule=None,
                 log_dir=None,
                 record_log=False,
                 force_reset=True):
        if log_dir is None:
            if logger.get_snapshot_dir() is None:
                logger.log(
                    "Warning: skipping Gym environment monitoring since snapshot_dir not configured."
                )
            else:
                log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log")
        Serializable.quick_init(self, locals())

        env = gym.envs.make(env_name)

        # HACK: Gets rid of the TimeLimit wrapper that sets 'done = True' when
        # the time limit specified for each environment has been passed and
        # therefore the environment is not Markovian (terminal condition depends
        # on time rather than state).
        env = env.env

        self.env = env
        self.env_id = env.spec.id

        assert not (not record_log and record_video)

        if log_dir is None or record_log is False:
            self.monitoring = False
        else:
            if not record_video:
                video_schedule = NoVideoSchedule()
            else:
                if video_schedule is None:
                    video_schedule = CappedCubicVideoSchedule()
            self.env = gym.wrappers.Monitor(self.env,
                                            log_dir,
                                            video_callable=video_schedule,
                                            force=True)
            self.monitoring = True

        self._observation_space = convert_gym_space(env.observation_space)
        logger.log("observation space: {}".format(self._observation_space))
        self._action_space = convert_gym_space(env.action_space)
        logger.log("action space: {}".format(self._action_space))
        self._horizon = env.spec.tags[
            'wrapper_config.TimeLimit.max_episode_steps']
        self._log_dir = log_dir
        self._force_reset = force_reset
Beispiel #7
0
    def __init__(self,
                 env_spec,
                 hidden_layer_sizes=(100, 100),
                 name='value_function'):
        Serializable.quick_init(self, locals())

        self._Do = env_spec.observation_space.flat_dim
        self._observations_ph = tf.placeholder(
            tf.float32, shape=[None, self._Do], name='observations')

        super(NNVFunction, self).__init__(
            inputs=(self._observations_ph, ),
            name=name,
            hidden_layer_sizes=hidden_layer_sizes)
Beispiel #8
0
    def __init__(self, env_spec, q_functions):
        Serializable.quick_init(self, locals())

        self.q_functions = q_functions

        self._Da = env_spec.action_space.flat_dim
        self._Do = env_spec.observation_space.flat_dim

        self._observations_ph = tf.placeholder(
            tf.float32, shape=[None, self._Do], name='observations')
        self._actions_ph = tf.placeholder(
            tf.float32, shape=[None, self._Da], name='actions')

        self._output = self.output_for(
            self._observations_ph, self._actions_ph, reuse=True)
Beispiel #9
0
 def __init__(
     self,
     env,
     scale_reward=1.,
     normalize_obs=False,
     normalize_reward=False,
     obs_alpha=0.001,
     reward_alpha=0.001,
 ):
     Serializable.quick_init(self, locals())
     ProxyEnv.__init__(self, env)
     self._scale_reward = scale_reward
     self._normalize_obs = normalize_obs
     self._normalize_reward = normalize_reward
     self._obs_alpha = obs_alpha
     self._obs_mean = np.zeros(np.prod(env.observation_space.low.shape))
     self._obs_var = np.ones(np.prod(env.observation_space.low.shape))
     self._reward_alpha = reward_alpha
     self._reward_mean = 0.
     self._reward_var = 1.
Beispiel #10
0
    def __init__(self,
                 env_spec,
                 hidden_layer_sizes,
                 squash=True,
                 name='policy'):
        Serializable.quick_init(self, locals())

        self._action_dim = env_spec.action_flat_dim
        self._observation_dim = env_spec.observation_flat_dim
        self._layer_sizes = list(hidden_layer_sizes) + [self._action_dim]
        self._squash = squash
        self._name = name

        self._observation_ph = tf.placeholder(
            tf.float32,
            shape=[None, self._observation_dim],
            name='observation')

        self._actions = self.actions_for(self._observation_ph)

        super(StochasticNNPolicy,
              self).__init__(env_spec, self._observation_ph, self._actions,
                             self._name)
Beispiel #11
0
    def __init__(self, env_spec, max_replay_buffer_size):
        super(SimpleReplayBuffer, self).__init__()
        Serializable.quick_init(self, locals())

        max_replay_buffer_size = int(max_replay_buffer_size)

        self._env_spec = env_spec
        self._observation_dim = env_spec.observation_flat_dim
        self._action_dim = env_spec.action_flat_dim
        self._max_buffer_size = max_replay_buffer_size
        self._observations = np.zeros((max_replay_buffer_size,
                                       self._observation_dim))
        # It's a bit memory inefficient to save the observations twice,
        # but it makes the code *much* easier since you no longer have to
        # worry about termination conditions.
        self._next_obs = np.zeros((max_replay_buffer_size,
                                   self._observation_dim))
        self._actions = np.zeros((max_replay_buffer_size, self._action_dim))
        self._rewards = np.zeros(max_replay_buffer_size)
        # self._terminals[i] = a terminal was received at time i
        self._terminals = np.zeros(max_replay_buffer_size, dtype='uint8')
        self._top = 0
        self._size = 0
Beispiel #12
0
 def __getstate__(self):
     d = Serializable.__getstate__(self)
     global load_params
     if load_params:
         d["params"] = self.get_param_values()
     return d
Beispiel #13
0
 def __init__(self, wrapped_env):
     Serializable.quick_init(self, locals())
     self._wrapped_env = wrapped_env
Beispiel #14
0
 def __setstate__(self, d):
     Serializable.__setstate__(self, d)
     self._obs_mean = d["_obs_mean"]
     self._obs_var = d["_obs_var"]
Beispiel #15
0
 def __getstate__(self):
     d = Serializable.__getstate__(self)
     d["_obs_mean"] = self._obs_mean
     d["_obs_var"] = self._obs_var
     return d