def _build_graph(self, **kwargs):
     """ Add attributes ph_y, ph_w, ph_lr
             methods _compute_loss, _apply_gradients
     """
     # build function approximator
     cls._build_graph(self, **kwargs)
     # build loss function
     self.ph_y = tf.placeholder(shape=[None, self.y_dim],
                                name="y",
                                dtype=tf_float)
     self.ph_w = tf.placeholder(
         shape=[None], name='w',
         dtype=tf_float)  # the weighting for each sample
     ts_loss = self._build_loss(self.ts_yh, self.ph_y,
                                self.ph_w)  # user-defined
     # build optimizer from loss
     ts_grads = list(
         zip(U.gradients(ts_loss, self.ts_vars),
             self.ts_vars))  # a list of (grad, var) tuples
     self.ph_lr = tf.placeholder(shape=[],
                                 name="learning_rate",
                                 dtype=tf_float)
     ts_apply_gradients = self._build_apply_gradients(
         ts_grads, self.ph_lr)
     self._compute_loss = U.function([self.ph_x, self.ph_y, self.ph_w],
                                     ts_loss)
     self._apply_gradients = U.function(
         [self.ph_x, self.ph_y, self.ph_w, self.ph_lr],
         ts_apply_gradients)
Exemple #2
0
 def _build_graph(self, **bg_kwargs):
     ts_loss, ph_args = self._build_loss_op(**bg_kwargs)
     # define compute_loss and compute_grad wrt loss
     self._compute_loss = U.function(ph_args, ts_loss)
     ts_grads = U.gradients(ts_loss, self._ts_vars)
     # fill None with zeros; otherwise tf.run will attempt to fetch for None.
     ts_grads = [g if g is not None else tf.zeros_like(v) for (v, g) in
                 zipsame(self._ts_vars, ts_grads)]
     self._compute_grad = U.function(ph_args, ts_grads)
Exemple #3
0
 def kl(self, other, x, reversesd=False):
     assert type(other) == type(self)
     key = str(id(other)) + str(reversesd)
     if self._kl_cache[key] is None:
         ts_kl = self.build_kl(self, other) if reversesd else self.build_kl(other, self)
         _kl = U.function([self.ph_x, other.ph_x], ts_kl)
         self._kl_cache[key] = lambda _x: _kl(_x, _x)
     return self._kl_cache[key](x)
Exemple #4
0
    def _build_graph(self, **kwargs):
        """ We treat tfFunctionApproximator as the stochastic map of the policy
        (which inputs ph_x and outputs ts_yh) and build additional
        attributes/methods required by Policy """
        # build tf.Variables
        # add attributes ph_x, ts_nor_x, ts_y, _yh, _sh_vars,
        #                ph_y, ts_pi, ts_logp, ts_pid
        tfFunctionApproximator._build_graph(self, **kwargs)

        # build additional graphs for Policy
        # build conditional distribution
        self._pi = self._yh
        self._pid = U.function([self.ph_x], self.ts_pid)
        self._logp = U.function([self.ph_x, self.ph_y], self.ts_logp)
        # build fvp operator (this depends only on self)
        ph_g, ts_grads = self._sh_vars.build_flat_ph()
        ts_kl = self.build_kl(self, self, p1_sg=True)
        ts_kl_grads = U.gradients(ts_kl, self.ts_vars)  # grad to the 2nd arg of KL
        ts_inner_prod = tf.add_n([tf.reduce_sum(kg * v) for (kg, v) in zipsame(ts_kl_grads, ts_grads)])
        ts_fvp = U.gradients(ts_inner_prod, self.ts_vars)  # Fisher (information matrix) and Vector Product
        ts_fvp = tf.concat([tf.reshape(f, [-1]) for f in ts_fvp], axis=-1)  # continuous vector
        self._fvp = U.function([self.ph_x, ph_g], ts_fvp)
Exemple #5
0
    def _build_graph(self, **kwargs):
        """
        Builds the graph of mapping through the user-provided
        _build_func_apprx.  After all the tf.Variables are created it adds a
        new attribute _sh_vars (a Shaper object) for convenient manipulation of
        the tf.Variables inside the graph.

        Added attributes:
            ph_x, ts_nor_x, ts_y, _yh, _sh_vars
        """
        # build the input placeholder
        self.ph_x = tf.placeholder(shape=[None, self.x_dim],
                                   name="input",
                                   dtype=tf_float)
        # build the normalizer for whitening
        self.ts_nor_x = self._nor.build_nor_ops(self.ph_x)
        # build parameterized function approximator
        self.ts_yh = self._build_func_apprx(self.ts_nor_x, **kwargs)
        self._yh = U.function([self.ph_x], self.ts_yh)

        # build a Shaper of trainable variables for transforming
        # between continguous and list representations
        self._sh_vars = U.Shaper(self.ts_vars)
Exemple #6
0
        def _build_dist(self, ts_nor_x, ph_y):
            # mean and std
            self.ts_mean = cls._build_func_apprx(self, ts_nor_x)  # use the tfFunctionApproximator to define mean
            self._ts_logstd = tf.get_variable(
                'logstd', shape=[self.y_dim], initializer=tf.constant_initializer(self._init_logstd))
            self._ts_stop_std_grad = tf.get_variable('stop_std_grad', initializer=tf.constant(False), trainable=False)
            _ts_logstd = tf.cond(self._ts_stop_std_grad,  # whether to stop gradient
                                 true_fn=lambda: tf.stop_gradient(self._ts_logstd),
                                 false_fn=lambda: self._ts_logstd)
            # make sure the distribution does not degenerate
            self.ts_logstd = tf.maximum(tf.to_float(np.log(self._min_std)), _ts_logstd)
            ts_std = tf.exp(self.ts_logstd)
            self._std = U.function([], ts_std)
            self._set_logstd = U.build_set([self._ts_logstd])
            self._set_stop_std_grad = U.build_set([self._ts_stop_std_grad])

            # pi
            self.ts_noise = tf.random_normal(tf.shape(ts_std), stddev=ts_std, seed=self.seed)
            ts_pi = self.ts_mean + self.ts_noise
            ts_pid = self.ts_mean
            # logp
            ts_logp = self._build_logp(self.y_dim, ph_y, self.ts_mean, self.ts_logstd)
            return ts_pi, ts_logp, ts_pid