def fvp0(self, xs, ys, g, **kwargs): """ Computes F(self.pi)*g, where F is the Fisher information matrix and g is a np.ndarray in the same shape as self.variable, were the Fisher information defined by the average over xs. """ gs = unflatten(g, shapes=self.var_shapes) ts_fvp = self.ts_fvp0(array_to_ts(xs), array_to_ts(ys), array_to_ts(gs), **kwargs) return flatten([v.numpy() for v in ts_fvp])
def fvp(self, xs, g, **kwargs): """ Return the product between a vector g (in the same formast as self.variable) and the Fisher information defined by the average over xs. """ gs = unflatten(g, shapes=self.var_shapes) ts_fvp = self.ts_fvp(array_to_ts(xs), array_to_ts(gs), **kwargs) return flatten([v.numpy() for v in ts_fvp])
def exp_fun(self, xs, As, bs, cs, canonical=True, diagonal_A=True): """ If canonical is True, computes E[ 0.5 y'*A*y + b'y + c] Else computes E[ 0.5 (y-m)'*A*(y-m) + b'*y + c] """ return self.ts_exp_fun(array_to_ts(xs), array_to_ts(As), array_to_ts(bs), array_to_ts(cs), canonical, diagonal_A).numpy()
def predict_w_noise(self, xs, stochastic=True, **kwargs): ts_ys, ts_ms, _ = self.ts_predict_all(array_to_ts(xs), stochastic=stochastic, **kwargs) ys, ms = ts_to_array(ts_ys), ts_to_array(ts_ms) ns = ys - ms return ys, ms
def __init__(self, x_shape, y_shape, name='tf_gaussian_policy', init_lstd=-1, min_std=1e-12, # new attribues **kwargs): init_lstd = np.broadcast_to(init_lstd, y_shape) self._ts_lstd = tf.Variable(array_to_ts(init_lstd), dtype=tf_float) self._ts_min_lstd = tf.constant(np.log(min_std), dtype=tf_float) super().__init__(x_shape, y_shape, name=name, **kwargs) self._mean_var_shapes = None
def update(self, f, w_or_logq, update_nor=True, **kwargs): """ Update the function with Monte-Carlo samples. f: sampled function values w_or_logq: importance weight or the log probability of the sampling distribution update_nor: whether to update the normalizer using the current sample """ super().update(**kwargs) if self._biased: self._nor.update(f) f_normalized = self._nor.normalize(f) # cv if self._use_log_loss: # ts_w_or_logq is w assert np.all(w_or_logq >= 0) # these are treated as constants assert f_normalized.shape == w_or_logq.shape self._ts_f = array_to_ts(f_normalized) self._ts_w_or_logq = array_to_ts(w_or_logq) if not self._biased and update_nor: self._nor.update(f)
def kl(self, other, xs, reversesd=False, **kwargs): """ Return the KL divergence for each data point in the batch xs. """ return ts_to_array(self.ts_kl(other, array_to_ts(xs), reversesd=reversesd))
def logp_grad(self, xs, ys, fs, **kwargs): ts_grad = self.ts_logp_grad(array_to_ts(xs), array_to_ts(ys), array_to_ts(fs), **kwargs) return flatten([v.numpy() for v in ts_grad])
def logp(self, xs, ys, **kwargs): # override # return self.ts_logp(array_to_ts(xs), array_to_ts(ys), **kwargs).numpy() return self._ts_logp(array_to_ts(xs), array_to_ts(ys), **kwargs).numpy()
def exp_grad(self, xs, As, bs, cs, canonical=True, diagonal_A=True): """ See exp_fun. """ ts_grad = self.ts_exp_grad(array_to_ts(xs), array_to_ts(As), array_to_ts(bs), array_to_ts(cs), canonical, diagonal_A) return flatten([v.numpy() for v in ts_grad])
def grad(self, x, **kwargs): return flatten(ts_to_array(self.ts_grad(array_to_ts(x), **kwargs)))
def fun(self, x, **kwargs): return ts_to_array(self.ts_fun(array_to_ts(x)), **kwargs)
def grad(self, xs, **kwargs): """ Derivative with respect to xs. """ return ts_to_array(self.ts_grad(array_to_ts(xs), **kwargs))
def predict(self, xs, **kwargs): return self.__ts_predict(array_to_ts(xs), **kwargs).numpy()