def step(self, obs): if obs.ndim < 2: obs = obs[np.newaxis, :] action_mean = self.actor(obs) dist = MultivariateNormalTriL(loc=action_mean, scale_tril=self.cov_mat) action = dist.sample() return action.numpy()[0], dist.log_prob(action)
def infer(self, obs, act): if obs.ndim < 2: obs = obs[np.newaxis, :] action_mean = self.actor(obs) dist = MultivariateNormalTriL(loc=action_mean, scale_tril=self.cov_mat) action_logprobs = dist.log_prob(act) dist_entropy = dist.entropy() q_value = self.critic(obs) return action_logprobs, tf.squeeze(q_value), dist_entropy
def update(m, P, ell): S = H @ mm(P, H, transpose_b=True) + R yp = mv(H, m) chol = tf.linalg.cholesky(S) predicted_dist = MultivariateNormalTriL(yp, chol) ell_t = predicted_dist.log_prob(y) Kt = tf.linalg.cholesky_solve(chol, H @ P) m = m + mv(Kt, y - yp, transpose_a=True) P = P - mm(Kt, S, transpose_a=True) @ Kt ell = ell + ell_t return ell, m, P
def pkf(lgssm, observations, return_loglikelihood=False, max_parallel=10000): with tf.name_scope("parallel_filter"): P0, Fs, Qs, H, R = lgssm dtype = P0.dtype m0 = tf.zeros(tf.shape(P0)[0], dtype=dtype) max_num_levels = math.ceil(math.log2(max_parallel)) initial_elements = make_associative_filtering_elements( m0, P0, Fs, Qs, H, R, observations) final_elements = scan_associative(filtering_operator, initial_elements, max_num_levels=max_num_levels) if return_loglikelihood: filtered_means = tf.concat( [tf.expand_dims(m0, 0), final_elements[1][:-1]], axis=0) filtered_cov = tf.concat( [tf.expand_dims(P0, 0), final_elements[2][:-1]], axis=0) predicted_means = mv(Fs, filtered_means) predicted_covs = mm(Fs, mm(filtered_cov, Fs, transpose_b=True)) + Qs obs_means = mv(H, predicted_means) obs_covs = mm(H, mm(predicted_covs, H, transpose_b=True)) + tf.expand_dims(R, 0) dists = MultivariateNormalTriL(obs_means, tf.linalg.cholesky(obs_covs)) # TODO: some logic could be added here to avoid handling the covariance of non-nan models, but no impact for GPs logprobs = dists.log_prob(observations) logprobs_without_nans = tf.where(tf.math.is_nan(logprobs), tf.zeros_like(logprobs), logprobs) total_log_prob = tf.reduce_sum(logprobs_without_nans) return final_elements[1], final_elements[2], total_log_prob return final_elements[1], final_elements[2]
def set_prior(self, loc=0., log_scale=np.log(np.expm1(1)), mixture_logits=None): r""" Set the prior for mixture density network loc : Scalar or Tensor with shape `[n_components, event_size]` log_scale : Scalar or Tensor with shape `[n_components, event_size]` for 'none' and 'diag' component, and `[n_components, event_size*(event_size +1)//2]` for 'full' component. mixture_logits : Scalar or Tensor with shape `[n_components]` """ event_size = self.event_size if self.covariance == 'diag': scale_shape = [self.n_components, event_size] fn = lambda l, s: MultivariateNormalDiag( loc=l, scale_diag=tf.nn.softplus(s)) elif self.covariance == 'none': scale_shape = [self.n_components, event_size] fn = lambda l, s: Independent( Normal(loc=l, scale=tf.math.softplus(s)), 1) elif self.covariance == 'full': scale_shape = [ self.n_components, event_size * (event_size + 1) // 2 ] fn = lambda l, s: MultivariateNormalTriL( loc=l, scale_tril=FillScaleTriL(diag_shift=1e-5)(tf.math.softplus(s))) # if isinstance(log_scale, Number) or tf.rank(log_scale) == 0: loc = tf.fill([self.n_components, self.event_size], loc) # if isinstance(log_scale, Number) or tf.rank(log_scale) == 0: log_scale = tf.fill(scale_shape, log_scale) # if mixture_logits is None: p = 1. / self.n_components mixture_logits = np.log(p / (1. - p)) if isinstance(mixture_logits, Number) or tf.rank(mixture_logits) == 0: mixture_logits = tf.fill([self.n_components], mixture_logits) # loc = tf.cast(loc, self.dtype) log_scale = tf.cast(log_scale, self.dtype) mixture_logits = tf.cast(mixture_logits, self.dtype) self._prior = MixtureSameFamily( components_distribution=fn(loc, log_scale), mixture_distribution=Categorical(logits=mixture_logits), name="prior") return self
def __init__(self, loc, scale, logits=None, probs=None, covariance_type='diag', trainable=False, validate_args=False, allow_nan_stats=True, name=None): kw = dict(validate_args=validate_args, allow_nan_stats=allow_nan_stats) self._trainable = bool(trainable) self._llk_history = [] if trainable: loc = tf.Variable(loc, trainable=True, name='loc') scale = tf.Variable(scale, trainable=True, name='scale') if logits is not None: logits = tf.Variable(logits, trainable=True, name='logits') if probs is not None: probs = tf.Variable(probs, trainable=True, name='probs') ### initialize mixture Categorical mixture = Categorical(logits=logits, probs=probs, name="MixtureWeights", **kw) n_components = mixture._num_categories() ### initialize Gaussian components covariance_type = str(covariance_type).lower().strip() if name is None: name = 'Mixture%sGaussian' % \ (covariance_type.capitalize() if covariance_type != 'none' else 'Independent') ## create the components if covariance_type == 'diag': if tf.rank(scale) == 0: # scalar extra_kw = dict(scale_identity_multiplier=scale) else: # a tensor extra_kw = dict(scale_diag=scale) components = MultivariateNormalDiag(loc=loc, name=name, **kw, **extra_kw) elif covariance_type in ('tril', 'full'): if tf.rank(scale) == 1 or \ (scale.shape[-1] != scale.shape[-2]): scale_tril = FillScaleTriL(diag_shift=np.array( 1e-5, tf.convert_to_tensor(scale).dtype.as_numpy_dtype())) scale = scale_tril(scale) components = MultivariateNormalTriL(loc=loc, scale_tril=scale, name=name, **kw) elif covariance_type == 'none': components = Independent(distribution=Normal(loc=loc, scale=scale, **kw), reinterpreted_batch_ndims=1, name=name) else: raise ValueError("No support for covariance_type: '%s'" % covariance_type) ### validate the n_components assert (components.batch_shape[-1] == int(n_components)), \ "Number of components mismatch, given:%d, mixture:%d, components:%d" % \ (mixture.event_shape[-1], components.batch_shape[-1], int(n_components)) super().__init__(mixture_distribution=mixture, components_distribution=components, name=name, **kw)