예제 #1
0
 def fvp0(self, xs, ys, g, **kwargs):
     """ Computes F(self.pi)*g, where F is the Fisher information matrix and
     g is a np.ndarray in the same shape as self.variable, were the Fisher
     information defined by the average over xs. """
     gs = unflatten(g, shapes=self.var_shapes)
     ts_fvp = self.ts_fvp0(array_to_ts(xs), array_to_ts(ys), array_to_ts(gs), **kwargs)
     return flatten([v.numpy() for v in ts_fvp])
예제 #2
0
 def fvp(self, xs, g, **kwargs):
     """ Return the product between a vector g (in the same formast as
     self.variable) and the Fisher information defined by the average
     over xs. """
     gs = unflatten(g, shapes=self.var_shapes)
     ts_fvp = self.ts_fvp(array_to_ts(xs), array_to_ts(gs), **kwargs)
     return flatten([v.numpy() for v in ts_fvp])
예제 #3
0
 def exp_fun(self, xs, As, bs, cs, canonical=True, diagonal_A=True):
     """
         If canonical is True, computes
             E[ 0.5 y'*A*y + b'y + c]
         Else computes
             E[ 0.5 (y-m)'*A*(y-m) + b'*y + c]
     """
     return self.ts_exp_fun(array_to_ts(xs), array_to_ts(As),
                            array_to_ts(bs), array_to_ts(cs),
                            canonical, diagonal_A).numpy()
예제 #4
0
 def predict_w_noise(self, xs, stochastic=True, **kwargs):
     ts_ys, ts_ms, _ = self.ts_predict_all(array_to_ts(xs),
                                           stochastic=stochastic,
                                           **kwargs)
     ys, ms = ts_to_array(ts_ys), ts_to_array(ts_ms)
     ns = ys - ms
     return ys, ms
예제 #5
0
 def __init__(self, x_shape, y_shape, name='tf_gaussian_policy',
              init_lstd=-1, min_std=1e-12,  # new attribues
              **kwargs):
     init_lstd = np.broadcast_to(init_lstd, y_shape)
     self._ts_lstd = tf.Variable(array_to_ts(init_lstd), dtype=tf_float)
     self._ts_min_lstd = tf.constant(np.log(min_std), dtype=tf_float)
     super().__init__(x_shape, y_shape, name=name, **kwargs)
     self._mean_var_shapes = None
예제 #6
0
    def update(self, f, w_or_logq, update_nor=True, **kwargs):
        """ Update the function with Monte-Carlo samples.

            f: sampled function values
            w_or_logq: importance weight or the log probability of the sampling distribution
            update_nor: whether to update the normalizer using the current sample
        """
        super().update(**kwargs)
        if self._biased:
            self._nor.update(f)
        f_normalized = self._nor.normalize(f)  # cv
        if self._use_log_loss:  # ts_w_or_logq is w
            assert np.all(w_or_logq >= 0)
        # these are treated as constants
        assert f_normalized.shape == w_or_logq.shape
        self._ts_f = array_to_ts(f_normalized)
        self._ts_w_or_logq = array_to_ts(w_or_logq)
        if not self._biased and update_nor:
            self._nor.update(f)
예제 #7
0
 def kl(self, other, xs, reversesd=False, **kwargs):
     """ Return the KL divergence for each data point in the batch xs. """
     return ts_to_array(self.ts_kl(other, array_to_ts(xs), reversesd=reversesd))
예제 #8
0
 def logp_grad(self, xs, ys, fs, **kwargs):
     ts_grad = self.ts_logp_grad(array_to_ts(xs), array_to_ts(ys),
                              array_to_ts(fs), **kwargs)
     return flatten([v.numpy() for v in ts_grad])
예제 #9
0
 def logp(self, xs, ys, **kwargs):  # override
 #   return self.ts_logp(array_to_ts(xs), array_to_ts(ys), **kwargs).numpy()
     return self._ts_logp(array_to_ts(xs), array_to_ts(ys), **kwargs).numpy()
예제 #10
0
 def exp_grad(self, xs, As, bs, cs, canonical=True, diagonal_A=True):
     """ See exp_fun. """
     ts_grad = self.ts_exp_grad(array_to_ts(xs), array_to_ts(As),
                                array_to_ts(bs), array_to_ts(cs),
                                canonical, diagonal_A)
     return flatten([v.numpy() for v in ts_grad])
예제 #11
0
 def grad(self, x, **kwargs):
     return flatten(ts_to_array(self.ts_grad(array_to_ts(x), **kwargs)))
예제 #12
0
 def fun(self, x, **kwargs):
     return ts_to_array(self.ts_fun(array_to_ts(x)), **kwargs)
예제 #13
0
 def grad(self, xs, **kwargs):
     """ Derivative with respect to xs. """
     return ts_to_array(self.ts_grad(array_to_ts(xs), **kwargs))
예제 #14
0
 def predict(self, xs, **kwargs):
     return self.__ts_predict(array_to_ts(xs), **kwargs).numpy()