def _create_graph(self):
        Dx = self._Dx

        if len(self._cond_t_lst) == 0:
            mu_and_logsig_t = tf.get_variable(
                'params', self._layer_sizes[-1],
                initializer=tf.random_normal_initializer(0, 0.1)
            )
        else:
            mu_and_logsig_t = mlp(
                inputs=self._cond_t_lst,
                layer_sizes=self._layer_sizes,
                output_nonlinearity=None,
            )  # ... x K*Dx*2+K

        self._mu_t = mu_and_logsig_t[..., :Dx]
        self._log_sig_t = tf.clip_by_value(mu_and_logsig_t[..., Dx:], LOG_SIG_CAP_MIN, LOG_SIG_CAP_MAX)

        # Tensorflow's multivariate normal distribution supports reparameterization
        ds = tf.contrib.distributions
        dist = ds.MultivariateNormalDiag(loc=self._mu_t, scale_diag=tf.exp(self._log_sig_t))
        x_t = dist.sample()
        if not self._reparameterize:
            x_t = tf.stop_gradient(x_t)
        log_pi_t = dist.log_prob(x_t)

        self._dist = dist
        self._x_t = x_t
        self._log_pi_t = log_pi_t
        
        reg_loss_t = self._reg * 0.5 * tf.reduce_mean(self._log_sig_t ** 2)
        reg_loss_t += self._reg * 0.5 * tf.reduce_mean(self._mu_t ** 2)
        self._reg_loss_t = reg_loss_t
Beispiel #2
0
    def _create_p_xz_params(self):
        K = self._K
        Dx = self._Dx

        if len(self._cond_t_lst) == 0:
            w_and_mu_and_logsig_t = tf.get_variable(
                'params',
                self._layer_sizes[-1],
                initializer=tf.random_normal_initializer(0, 0.1))

        else:
            w_and_mu_and_logsig_t = mlp(
                inputs=self._cond_t_lst,
                layer_sizes=self._layer_sizes,
                output_nonlinearity=None,
            )  # ... x K*Dx*2+K

        w_and_mu_and_logsig_t = tf.reshape(w_and_mu_and_logsig_t,
                                           shape=(-1, K, 2 * Dx + 1))

        log_w_t = w_and_mu_and_logsig_t[..., 0]
        mu_t = w_and_mu_and_logsig_t[..., 1:1 + Dx]
        log_sig_t = w_and_mu_and_logsig_t[..., 1 + Dx:]

        log_sig_t = tf.clip_by_value(log_sig_t, LOG_SIG_CAP_MIN,
                                     LOG_SIG_CAP_MAX)

        return log_w_t, mu_t, log_sig_t
Beispiel #3
0
    def _create_graph(self):

        logits = mlp(
            inputs=self._cond_t_lst,
            layer_sizes=self._layer_sizes,
            output_nonlinearity=tf.nn.sigmoid,
        )
        l_logits = [
            logits[:, self._slice[i]:self._slice[i + 1]]
            for i in range(self._actnum)
        ]
        self._logits = []
        for i in range(self._actnum):
            up_tri = tf.constant(
                np.triu(np.ones((self._Dx[i], self._Dx[i]), dtype=np.float32)))
            low_tri = tf.constant(
                np.tril(np.ones((self._Dx[i], self._Dx[i]), dtype=np.float32),
                        -1))
            ordinal_logits = tf.matmul(
                tf.log(l_logits[i] + EPS), up_tri) + tf.matmul(
                    tf.log(1 - l_logits[i] + EPS), low_tri)
            self._logits.append(ordinal_logits)
        #self._logits = [logits[:,self._slice[i]:self._slice[i+1]] for i in range(self._actnum)]
        self._probs = [tf.nn.softmax(logit) for logit in self._logits]
        self._logprobs = [tf.nn.log_softmax(logit) for logit in self._logits]
Beispiel #4
0
    def get_output_for(self, *inputs, reuse=False):
        with tf.variable_scope(self._name, reuse=reuse):
            value_t = mlp(
                inputs=inputs,
                output_nonlinearity=None,
                layer_sizes=self._layer_sizes,
            )  # N

        return value_t
Beispiel #5
0
    def _create_graph(self):

        logits = mlp(
            inputs=self._cond_t_lst,
            layer_sizes=self._layer_sizes,
            output_nonlinearity=None,
        )
        self._logits = logits
        self._probs = tf.nn.softmax(logits)
        self._logprobs = tf.nn.log_softmax(logits)
Beispiel #6
0
    def get_output_for(self, *inputs, **kwargs):
        if 'reuse' not in kwargs.keys():
            reuse = False
        else:
            reuse = kwargs['reuse']
        with tf.variable_scope(self._name, reuse=reuse):
            value_t = mlp(
                inputs=inputs,
                output_nonlinearity=None,
                layer_sizes=self._layer_sizes,
            )  # N

        return value_t