def create_prior(K, a_p=1, b_p=1, a_gamma=1, b_gamma=1, m_loc=0, g_loc=0.1, m_sigma=3, s_sigma=2, m_nu=0, s_nu=1, m_skew=0, g_skew=0.1, dtype=np.float64): return tfd.JointDistributionNamed( dict( p=tfd.Beta(dtype(a_p), dtype(b_p)), gamma_C=tfd.Gamma(dtype(a_gamma), dtype(b_gamma)), gamma_T=tfd.Gamma(dtype(a_gamma), dtype(b_gamma)), eta_C=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), eta_T=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), nu=tfd.Sample(tfd.LogNormal(dtype(m_nu), s_nu), sample_shape=K), sigma_sq=tfd.Sample(tfd.InverseGamma(dtype(m_sigma), dtype(s_sigma)), sample_shape=K), loc=lambda sigma_sq: tfd.Independent(tfd.Normal( dtype(m_loc), g_loc * tf.sqrt(sigma_sq)), reinterpreted_batch_ndims=1), skew=lambda sigma_sq: tfd.Independent(tfd.Normal( dtype(m_skew), g_skew * tf.sqrt(sigma_sq)), reinterpreted_batch_ndims=1), ))
def _model(): p = yield Root(tfd.Beta(dtype(1), dtype(1), name="p")) gamma_C = yield Root(tfd.Beta(dtype(1), dtype(1), name="gamma_C")) gamma_T = yield Root(tfd.Beta(dtype(1), dtype(1), name="gamma_T")) eta_C = yield Root(tfd.Dirichlet(np.ones(K, dtype=dtype) / K, name="eta_C")) eta_T = yield Root(tfd.Dirichlet(np.ones(K, dtype=dtype) / K, name="eta_T")) loc = yield Root(tfd.Sample(tfd.Normal(dtype(0), dtype(1)), sample_shape=K, name="loc")) nu = yield Root(tfd.Sample(tfd.Uniform(dtype(10), dtype(50)), sample_shape=K, name="nu")) phi = yield Root(tfd.Sample(tfd.Normal(dtype(m_phi), dtype(s_phi)), sample_shape=K, name="phi")) sigma_sq = yield Root(tfd.Sample(tfd.InverseGamma(dtype(3), dtype(2)), sample_shape=K, name="sigma_sq")) scale = np.sqrt(sigma_sq) gamma_T_star = compute_gamma_T_star(gamma_C, gamma_T, p) eta_T_star = compute_eta_T_star(gamma_C[..., tf.newaxis], gamma_T[..., tf.newaxis], eta_C, eta_T, p[..., tf.newaxis], gamma_T_star[..., tf.newaxis]) # likelihood y_C = yield mix(nC, eta_C, loc, scale, name="y_C") n0C = yield tfd.Binomial(nC, gamma_C, name="n0C") y_T = yield mix(nT, eta_T_star, loc, scale, name="y_T") n0T = yield tfd.Binomial(nT, gamma_T_star, name="n0T")
def create_model(n_C, n_T, K, neg_inf=-10, dtype=np.float64): return tfd.JointDistributionNamed( dict(p=tfd.Beta(dtype(1), dtype(1)), gamma_C=tfd.Gamma(dtype(3), dtype(3)), gamma_T=tfd.Gamma(dtype(3), dtype(3)), eta_C=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), eta_T=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), loc=tfd.Sample(tfd.Normal(dtype(0), dtype(1)), sample_shape=K), sigma_sq=tfd.Sample(tfd.InverseGamma(dtype(3), dtype(2)), sample_shape=K), y_C=lambda gamma_C, eta_C, loc, sigma_sq: mix( gamma_C, eta_C, loc, tf.sqrt(sigma_sq), dtype(neg_inf), n_C), y_T=lambda gamma_C, gamma_T, eta_C, eta_T, p, loc, sigma_sq: mix_T(gamma_C, gamma_T, eta_C, eta_T, p, loc, tf.sqrt(sigma_sq), dtype(neg_inf), n_T)))
def __call__(self, shape: Tuple[int, ...], dtype: Optional[tf.DType] = None) -> tf.Tensor: """ Compute initializations along the given axis. Args: shape: Shape of Tensor to initialize. dtype: DType of Tensor to initialize. Returns: Initial value. """ axis = (len(shape) + self.axis) % len(shape) alpha_as_tensor = tf.convert_to_tensor(self.alpha) alpha = (tf.tile([alpha_as_tensor], [shape[axis]]) if tf.size(alpha_as_tensor) == 1 else alpha_as_tensor) dirichlet_sample = distributions.Dirichlet(concentration=alpha).sample( [dim for i, dim in enumerate(shape) if i != axis]) perm = [ i if i < axis else len(shape) - 1 if i == axis else i - 1 for i in range(len(shape)) ] return tf.cast(tf.transpose(dirichlet_sample, perm), dtype)
def prior() -> tfd.Dirichlet: """ Create Dirichlet instance for prior distribution. :return: Dirichlet instance """ return tfd.Dirichlet(concentration=concentration, name="topics_prior")
def losses(self): """Sum of KL divergences between posteriors and priors""" w_prior = tfd.Dirichlet(tf.ones([self.K])) theta_prior = tfd.Beta([0.1, 3, 9.9], [9.9, 3, 0.1]) return (tf.reduce_sum(tfd.kl_divergence(self.weight, w_prior)) + tf.reduce_sum(tfd.kl_divergence(self.ASR, theta_prior)))
def get_dirichlet(self, Input): with tf.variable_scope(self.name): alphas = self.transformation.transform(Input) alphas = tf.nn.softplus(alphas) + 1e-6 dirichlet = tfd.Dirichlet(alphas, validate_args=True, allow_nan_stats=False) return dirichlet
def __call__(self): """Get the distribution object from the backend""" if get_backend() == 'pytorch': import torch.distributions as tod return tod.dirichlet.Dirichlet(self.concentration) else: from tensorflow_probability import distributions as tfd return tfd.Dirichlet(self.concentration)
def encoder(bag_of_words: tf.Tensor) -> tfd.Dirichlet: """ Map bag-of-words to a Dirichlet instance. :param bag_of_words: numpy nd array of values :return: clipped Dirichlet of dictionary """ net = clip_dirichlet_parameters(encoder_net(bag_of_words)) return tfd.Dirichlet(concentration=net, name="topics_posterior")
def gmm(nb_clusters, hyperparams, batch_size): alpha = np.full(shape=nb_clusters, fill_value=hyperparams['alpha'], dtype='float32') theta = tfd.Dirichlet(concentration=alpha).sample() # assignments probability mu = tfd.Normal(loc=hyperparams['mu0'], scale=hyperparams['sigma0']).sample(nb_clusters) # centroids z = tfd.OneHotCategorical(probs=theta).sample(batch_size) # assignment indicators assignments = tf.argmax(z.numpy(), axis=1) means = tf.gather(mu, assignments) # mapping stds = tf.fill(dims=batch_size, value=hyperparams['tau2']) x = tfd.Normal(loc=means, scale=stds).sample() return mu.numpy(), theta.numpy(), assignments.numpy(), x.numpy()
def set_prior(self, mu_prior=None, sigma_prior=None, theta_prior=None): """Set prior ditributions """ # Prior distributions for the means if mu_prior is None: self.mu_prior = tfd.Normal(tf.zeros((self.Nc, self.Nd)), tf.ones((self.Nc, self.Nd))) else: self.mu_prior = self.mu_prior # Prior distributions for the standard deviations if sigma_prior is None: self.sigma_prior = tfd.Gamma(2 * tf.ones((self.Nc, self.Nd)), 2 * tf.ones((self.Nc, self.Nd))) else: self.sigma_prior = sigma_prior # Prior distributions for the component weights if theta_prior is None: self.theta_prior = tfd.Dirichlet(5 * tf.ones((self.Nc, ))) else: self.theta_prior = theta_prior
def sample_posteriors_global(self, nsamples=1000): posterior_pi = tfd.Dirichlet(self.alpha) pis = posterior_pi.sample(nsamples) W1 = np.tile(self.W, [nsamples,1,1,1]) #nsamples x K x N=2 x N=2 posterior_lambda = tfd.WishartTriL(df=self.dof, scale_tril=tf.linalg.cholesky(W1)) lambdas = posterior_lambda.sample() #nsamples x K x N=2 x N=2 def get_posterior_mu(beta, mu, lambdas): locations = np.broadcast_to(mu, lambdas.shape[0:1]+mu.shape) precisions = (lambdas*beta[None,:,None,None]) covs = tf.linalg.inv(precisions) #! covs = 0.5*(covs + tf.transpose(covs, [0, 1, 3, 2])) # numerical stability workaround d = tfd.MultivariateNormalTriL(loc=locations, scale_tril=tf.linalg.cholesky(covs)) return d posterior_mu = get_posterior_mu(self.beta, self.mu, lambdas) mus = posterior_mu.sample() return posterior_pi, posterior_lambda, posterior_mu, pis, lambdas, mus
def _init_distribution(conditions, **kwargs): concentration = conditions["concentration"] return tfd.Dirichlet(concentration=concentration, **kwargs)
def estimate_transcript_vae_mixture( init_feed_dict, num_samples, n, vars, x0_log, num_mix_components, num_pca_components): log_prior = 0.0 # z_mix # ----- z_mix_prior = tfd.Dirichlet( concentration=tf.constant(5.0, shape=[num_mix_components]), name="z_mix_prior") z_mix = tf.Variable( tf.zeros([num_mix_components]), dtype=tf.float32, trainable=False, name="z_mix") log_prior += tf.reduce_sum(z_mix_prior.log_prob(tf.nn.softmax(z_mix))) # z_comp_loc # ---------- z_comp_loc_prior = tfd.Normal( loc=tf.constant(0.0, dtype=tf.float32), scale=tf.constant(5.0, dtype=tf.float32), name="z_comp_loc_prior") z_comp_loc = tf.Variable( # tf.zeros([num_mix_components, num_pca_components]), tf.random_normal([num_mix_components, num_pca_components], stddev=0.1), name="z_comp_loc") log_prior += tf.reduce_sum(z_comp_loc_prior.log_prob(z_comp_loc)) # z_comp_scale # ------------ z_comp_scale_prior = HalfCauchy( loc=0.0, scale=0.01, name="z_comp_scale_prior") # TODO: allowing this to be trainable completely f***s the whole thing up. # Maybe I just need to tinker with the prior. Not sure. z_comp_scale = tf.clip_by_value(tf.nn.softplus(tf.Variable( # tf.fill([num_mix_components, num_pca_components], -4.0), tf.fill([num_mix_components, num_pca_components], 1.0), # trainable=False, name="z_comp_scale")), 0.01, 100.0) z_comp_scale = tf.Print(z_comp_scale, [tf.reduce_min(z_comp_scale), tf.reduce_max(z_comp_scale)], "z_comp_scale span") # log_prior += tf.reduce_sum(z_comp_scale_prior.log_prob(z_comp_scale)) # low dimensional representation z_comp_dist = tfd.Normal( loc=z_comp_loc, scale=z_comp_scale, name="z_comp_dist") z = tf.Variable( tf.random_normal([num_samples, num_pca_components], stddev=0.1), name="z") z_comp_log_prob = z_comp_dist.log_prob(tf.expand_dims(z, 1)) z_comp_log_prob += tf.expand_dims(tf.expand_dims(tf.nn.softmax(z_mix), 0), -1) z_log_prob = tf.reduce_logsumexp(z_comp_log_prob, 1) log_prior += tf.reduce_sum(z_log_prob) # x_loc # ----- hidden1 = tf.layers.dense( z, 64, activation=tf.nn.relu) hidden2 = tf.layers.dense( hidden1, 64, activation=tf.nn.relu) x_loc = tf.layers.dense( hidden2, n, activation=tf.nn.relu) # TODO: x error # x_scale # ------- x_scale_prior = HalfCauchy( loc=0.0, scale=0.01, name="x_scale_prior") x_scale = tf.nn.softplus(tf.Variable( tf.fill([n], -4.0), name="x_scale")) x_scale = tf.Print(x_scale, [tf.reduce_min(x_scale), tf.reduce_max(x_scale)], "x_scale span") # log_prior += tf.reduce_sum(x_scale_prior.log_prob(x_scale)) # x # - x_prior = tfd.Normal( loc=x_loc, scale=x_scale) x = tf.Variable(x0_log, name="x") log_prior += tf.reduce_sum(x_prior.log_prob(x)) # likelihood, training log_likelihood = rnaseq_approx_likelihood_from_vars(vars, x) log_posterior = log_prior + log_likelihood sess = tf.Session() train(sess, -log_posterior, init_feed_dict, 500, 5e-2) print(sess.run(z_comp_log_prob)) z_comp_log_prob = tf.reduce_sum(z_comp_log_prob, 2) component_probs = sess.run(tf.exp( z_comp_log_prob - tf.reduce_logsumexp(z_comp_log_prob, 1, keepdims=True))) print(sess.run(z)) print(sess.run(z_comp_loc)) return component_probs
def weight(self): """Variational posterior for the weight""" return tfd.Dirichlet(tf.math.exp(self.w_size))
def _init_distribution(conditions): a = conditions["a"] return tfd.Dirichlet(concentration=a)
_gamma = gamma[..., tf.newaxis] # FIXME: Possible to use tfd.Blockwise? return tfd.Mixture( cat=tfd.Categorical(probs=tf.concat([_gamma, 1 - _gamma], axis=-1)), components=[ tfd.Deterministic(np.float64(neg_inf)), tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical(probs=eta), components_distribution=tfd.Normal(loc=loc, scale=scale)), ]) # TEST: K = 5 gamma = tfd.Beta(dtype(1), 1.).sample() eta = tfd.Dirichlet(tf.ones(K, dtype=dtype) / K).sample() m = mix(gamma, eta, tf.zeros(K, dtype=dtype), tf.ones(K, dtype=dtype), dtype(-10)) s = m.sample(3) m.log_prob(s) # NOTE: # - `Sample` and `Independent` resemble, respectively, `filldist` and `arraydist` in Turing. def create_model(n_C, n_T, K, neg_inf=-10, dtype=np.float64): return tfd.JointDistributionNamed( dict(p=tfd.Beta(dtype(1), dtype(1)), gamma_C=tfd.Beta(dtype(1), dtype(1)), gamma_T=tfd.Beta(dtype(1), dtype(1)), eta_C=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), eta_T=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K),
def _base_dist(self, a: TensorLike, *args, **kwargs): return tfd.Dirichlet(concentration=a, *args, **kwargs)
def theta(self): """Variational posterior for the component size""" return tfd.Dirichlet(tf.math.exp(self.counts))
def E_log_p_pi_samples(self, pis): prior_pi = tfd.Dirichlet(self.alpha0) log_prior_pis = prior_pi.log_prob(pis) return tf.reduce_mean(log_prior_pis)