Ejemplo n.º 1
0
    def build(self):
        self._observations_ph = tf.placeholder(
            dtype=tf.float32,
            shape=(None, self._Ds),
            name='observations',
        )

        self._latents_ph = tf.placeholder(
            dtype=tf.float32,
            shape=(None, self._Da),
            name='latents',
        )

        self.sample_z = tf.random_uniform([], 0, self._K, dtype=tf.int32)

        # TODO.code_consolidation:
        # self.distribution is used very differently compared to the
        # `LatentSpacePolicy`s distribution.
        # This does not use `self.actions_for` because we need to manually
        # access e.g. `self.distribution.mus_t`
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
            self.distribution = GMM(
                K=self._K,
                hidden_layers_sizes=self._hidden_layers,
                Dx=self._Da,
                cond_t_lst=(self._observations_ph,),
                reg=self._reg,
            )

        raw_actions = tf.stop_gradient(self.distribution.x_t)
        self._actions = tf.tanh(raw_actions) if self._squash else raw_actions
Ejemplo n.º 2
0
    def actions_for(self, observations, latents=None,
                    name=None, reuse=tf.AUTO_REUSE,
                    with_log_pis=False, regularize=False):
        name = name or self.name

        with tf.variable_scope(name, reuse=reuse):
            distribution = GMM(
                K=self._K,
                hidden_layers_sizes=self._hidden_layers,
                Dx=self._Da,
                cond_t_lst=(observations,),
                reg=self._reg
            )

        raw_actions = tf.stop_gradient(distribution.x_t)
        actions = tf.tanh(raw_actions) if self._squash else raw_actions

        # TODO: should always return same shape out
        # Figure out how to make the interface for `log_pis` cleaner
        if with_log_pis:
            # TODO.code_consolidation: should come from log_pis_for
            log_pis = distribution.log_p_t
            if self._squash:
                log_pis -= self._squash_correction(raw_actions)
            return actions, log_pis

        return actions
Ejemplo n.º 3
0
 def get_distribution_for(self, obs_pl: tf.Tensor, reuse: bool) -> GMM:
     with tf.variable_scope(self.name, reuse=reuse):
         return GMM(
             K=self._K,
             hidden_layers_sizes=self._hidden_layers,
             Dx=self._Da,
             cond_t_lst=(obs_pl,),
             reg=self._reg
         )
Ejemplo n.º 4
0
    def get_distribution_for(self, obs_t, reuse=False):
        """Create the actual GMM distribution instance."""

        with tf.variable_scope('policy', reuse=reuse):
            gmm = GMM(K=self._K,
                      hidden_layers_sizes=self._hidden_layers,
                      Dx=self._Da,
                      cond_t_lst=[obs_t],
                      reg=self._reg)

        return gmm