def entropy(self, alpha): """Entropy of probability distribution. Parameters ---------- alpha : tf.Tensor A n-D tensor with each :math:`\\alpha` constrained to :math:`\\alpha_i > 0`. Returns ------- tf.Tensor A tensor of one dimension less than the input. """ alpha = tf.cast(alpha, dtype=tf.float32) multivariate_idx = len(get_dims(alpha)) - 1 K = get_dims(alpha)[multivariate_idx] if multivariate_idx == 0: a = tf.reduce_sum(alpha) return tf.lbeta(alpha) + \ (a - K) * tf.digamma(a) - \ tf.reduce_sum((alpha-1.0) * tf.digamma(alpha)) else: a = tf.reduce_sum(alpha, multivariate_idx) return tf.lbeta(alpha) + \ (a - K) * tf.digamma(a) - \ tf.reduce_sum((alpha-1.0) * tf.digamma(alpha), multivariate_idx)
def entropy(self, n, p): """TODO """ # Note that given n and p where p is a probability vector of # length k, the entropy requires a sum over all # possible configurations of a k-vector which sums to n. It's # expensive. # http://stackoverflow.com/questions/36435754/generating-a-numpy-array-with-all-combinations-of-numbers-that-sum-to-less-than sess = tf.Session() n = sess.run(tf.cast(tf.squeeze(n), dtype=tf.int32)) sess.close() p = tf.cast(tf.squeeze(p), dtype=tf.float32) if isinstance(n, np.int32): k = get_dims(p)[0] max_range = np.zeros(k, dtype=np.int32) + n x = np.array([ i for i in product(*(range(i + 1) for i in max_range)) if sum(i) == n ]) logpmf = self.logpmf(x, n, p) return tf.reduce_sum(tf.exp(logpmf) * logpmf) else: out = [] for j in range(n.shape[0]): k = get_dims(p)[0] max_range = np.zeros(k, dtype=np.int32) + n[j] x = np.array([ i for i in product(*(range(i + 1) for i in max_range)) if sum(i) == n[j] ]) logpmf = self.logpmf(x, n[j], p[j, :]) out += [tf.reduce_sum(tf.exp(logpmf) * logpmf)] return tf.pack(out)
def log_prob(self, xs, zs): """Return a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])].""" x = xs['x'] pi, mus, sigmas = zs log_prior = dirichlet.logpdf(pi, self.alpha) log_prior += tf.reduce_sum(norm.logpdf(mus, 0, np.sqrt(self.c)), 1) log_prior += tf.reduce_sum(invgamma.logpdf(sigmas, self.a, self.b), 1) # Loop over each sample zs[s, :]. log_lik = [] N = get_dims(x)[0] n_samples = get_dims(pi)[0] for s in range(n_samples): # log-likelihood is # sum_{n=1}^N log sum_{k=1}^K exp( log pi_k + log N(x_n; mu_k, sigma_k) ) # Create a K x N matrix, whose entry (k, n) is # log pi_k + log N(x_n; mu_k, sigma_k). matrix = [] for k in range(self.K): matrix += [tf.ones(N)*tf.log(pi[s, k]) + multivariate_normal.logpdf(x, mus[s, (k*self.D):((k+1)*self.D)], sigmas[s, (k*self.D):((k+1)*self.D)])] matrix = tf.pack(matrix) # log_sum_exp() along the rows is a vector, whose nth # element is the log-likelihood of data point x_n. vector = log_sum_exp(matrix, 0) # Sum over data points to get the full log-likelihood. log_lik_z = tf.reduce_sum(vector) log_lik += [log_lik_z] return log_prior + tf.pack(log_lik)
def entropy(self, n, p): # Note that given n and p where p is a probability vector of # length k, the entropy requires a sum over all # possible configurations of a k-vector which sums to n. It's # expensive. # http://stackoverflow.com/questions/36435754/generating-a-numpy-array-with-all-combinations-of-numbers-that-sum-to-less-than sess = tf.Session() n = sess.run(tf.cast(tf.squeeze(n), dtype=tf.int32)) sess.close() p = tf.cast(tf.squeeze(p), dtype=tf.float32) if isinstance(n, np.int32): k = get_dims(p)[0] max_range = np.zeros(k, dtype=np.int32) + n x = np.array([i for i in product(*(range(i+1) for i in max_range)) if sum(i)==n]) logpmf = self.logpmf(x, n, p) return tf.reduce_sum(tf.mul(tf.exp(logpmf), logpmf)) else: out = [] for j in range(n.shape[0]): k = get_dims(p)[0] max_range = np.zeros(k, dtype=np.int32) + n[j] x = np.array([i for i in product(*(range(i+1) for i in max_range)) if sum(i)==n[j]]) logpmf = self.logpmf(x, n[j], p[j, :]) out += [tf.reduce_sum(tf.mul(tf.exp(logpmf), logpmf))] return tf.pack(out)
def log_prob(self, xs, zs): """Returns a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])].""" x = xs['x'] pi, mus, sigmas = self.unpack_params(zs) log_prior = dirichlet.logpdf(pi, self.alpha) log_prior += tf.reduce_sum(norm.logpdf(mus, 0, np.sqrt(self.c))) log_prior += tf.reduce_sum(invgamma.logpdf(sigmas, self.a, self.b)) # Loop over each sample zs[s, :]. log_lik = [] N = get_dims(x)[0] n_samples = get_dims(pi)[0] for s in range(n_samples): # log-likelihood is # sum_{n=1}^N log sum_{k=1}^K exp( log pi_k + log N(x_n; mu_k, sigma_k) ) # Create a K x N matrix, whose entry (k, n) is # log pi_k + log N(x_n; mu_k, sigma_k). matrix = [] for k in range(self.K): matrix += [ tf.ones(N) * tf.log(pi[s, k]) + multivariate_normal.logpdf( x, mus[s, (k * self.D):((k + 1) * self.D)], sigmas[s, (k * self.D):((k + 1) * self.D)]) ] matrix = tf.pack(matrix) # log_sum_exp() along the rows is a vector, whose nth # element is the log-likelihood of data point x_n. vector = log_sum_exp(matrix, 0) # Sum over data points to get the full log-likelihood. log_lik_z = tf.reduce_sum(vector) log_lik += [log_lik_z] return log_prior + tf.pack(log_lik)
def _test(distribution, bijector, n): x = TransformedDistribution(distribution=distribution, bijector=bijector, validate_args=True) val_est = get_dims(x.sample(n)) val_true = n + get_dims(distribution.mean()) assert val_est == val_true
def _test(base_dist_cls, lower_cutoff, upper_cutoff, n, **base_dist_args): x = QuantizedDistribution(base_dist_cls=base_dist_cls, lower_cutoff=lower_cutoff, upper_cutoff=upper_cutoff, **base_dist_args) val_est = get_dims(x.sample(n)) val_true = n + get_dims(lower_cutoff) assert val_est == val_true
def _test(mu, diag_large, v, diag_small, n): x = MultivariateNormalDiagPlusVDVT(mu=mu, diag_large=diag_large, v=v, diag_small=diag_small) val_est = get_dims(x.sample(n)) val_true = n + get_dims(mu) assert val_est == val_true
def _test(base_dist_cls, lower_cutoff, upper_cutoff, n, **base_dist_args): x = QuantizedDistribution( base_dist_cls=base_dist_cls, lower_cutoff=lower_cutoff, upper_cutoff=upper_cutoff, **base_dist_args) val_est = get_dims(x.sample(n)) val_true = n + get_dims(lower_cutoff) assert val_est == val_true
def _test(distribution, lower_cutoff, upper_cutoff, n): x = QuantizedDistribution( distribution=distribution, lower_cutoff=lower_cutoff, upper_cutoff=upper_cutoff, validate_args=True) val_est = get_dims(x.sample(n)) val_true = n + get_dims(lower_cutoff) assert val_est == val_true
def _test(base_dist_cls, transform, inverse, log_det_jacobian, n, **base_dist_args): x = TransformedDistribution(base_dist_cls=base_dist_cls, transform=transform, inverse=inverse, log_det_jacobian=log_det_jacobian, **base_dist_args) val_est = get_dims(x.sample(n)) val_true = n + get_dims(base_dist_args['mu']) assert val_est == val_true
def _test(base_dist_cls, transform, inverse, log_det_jacobian, n, **base_dist_args): x = TransformedDistribution( base_dist_cls=base_dist_cls, transform=transform, inverse=inverse, log_det_jacobian=log_det_jacobian, **base_dist_args ) val_est = get_dims(x.sample(n)) val_true = n + get_dims(base_dist_args["mu"]) assert val_est == val_true
def log_prob(self, xs, zs): """Returns a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])].""" N = get_dims(xs)[0] # Loop over each mini-batch zs[b,:] log_prob = [] for z in tf.unpack(zs): # Do the unconstrained to constrained transformation for MAP here. pi, mus, sigmas = self.unpack_params(z) pi = tf.sigmoid(pi) pi = tf.concat(0, [ pi[0:(self.K - 1)], tf.expand_dims(1.0 - tf.reduce_sum(pi[0:(self.K - 1)]), 0) ]) sigmas = tf.nn.softplus(sigmas) log_prior = dirichlet.logpdf(pi, self.alpha) for k in xrange(self.K): log_prior += norm.logpdf(mus[k * self.D], 0, np.sqrt(self.c)) log_prior += norm.logpdf(mus[k * self.D + 1], 0, np.sqrt(self.c)) log_prior += invgamma.logpdf(sigmas[k * self.D], self.a, self.b) log_prior += invgamma.logpdf(sigmas[k * self.D + 1], self.a, self.b) log_lik = tf.constant(0.0, dtype=tf.float32) for x in tf.unpack(xs): for k in xrange(self.K): log_lik += tf.log(pi[k]) log_lik += multivariate_normal.logpdf( x, mus[(k * self.D):((k + 1) * self.D)], sigmas[(k * self.D):((k + 1) * self.D)]) log_prob += [log_prior + log_lik] return tf.pack(log_prob)
def log_prob(self, xs, zs): """Return scalar, the log joint density log p(xs, zs).""" x = xs['x'] pi, mus, sigmas = zs['pi'], zs['mu'], zs['sigma'] log_prior = dirichlet.logpdf(pi, self.alpha) log_prior += tf.reduce_sum(norm.logpdf(mus, 0.0, self.c)) log_prior += tf.reduce_sum(invgamma.logpdf(sigmas, self.a, self.b)) # log-likelihood is # sum_{n=1}^N log sum_{k=1}^K exp( log pi_k + log N(x_n; mu_k, sigma_k) ) # Create a K x N matrix, whose entry (k, n) is # log pi_k + log N(x_n; mu_k, sigma_k). N = get_dims(x)[0] matrix = [] for k in range(self.K): matrix += [tf.ones(N) * tf.log(pi[k]) + multivariate_normal_diag.logpdf(x, mus[(k * self.D):((k + 1) * self.D)], sigmas[(k * self.D):((k + 1) * self.D)])] matrix = tf.pack(matrix) # log_sum_exp() along the rows is a vector, whose nth # element is the log-likelihood of data point x_n. vector = log_sum_exp(matrix, 0) # Sum over data points to get the full log-likelihood. log_lik = tf.reduce_sum(vector) return log_prior + log_lik
def entropy(self, mean=None, cov=1): """Entropy of probability distribution. This is not vectorized with respect to any arguments. Parameters ---------- mean : tf.Tensor, optional A 1-D tensor. Defaults to zero mean. cov : tf.Tensor, optional A 1-D or 2-D tensor. Defaults to identity matrix. Returns ------- tf.Tensor A tensor of one dimension less than the input. """ if cov is 1: d = 1 det_cov = 1.0 else: cov = tf.cast(cov, dtype=tf.float32) d = get_dims(cov)[0] if len(cov.get_shape()) == 1: det_cov = tf.reduce_prod(cov) else: det_cov = tf.matrix_determinant(cov) return 0.5 * (d + d*tf.log(2*np.pi) + tf.log(det_cov))
def log_prob(self, xs, zs): """Returns a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])].""" N = get_dims(xs)[0] # Loop over each mini-batch zs[b,:] log_prob = [] for z in tf.unpack(zs): # Do the unconstrained to constrained transformation for MAP here. pi, mus, sigmas = self.unpack_params(z) pi = tf.sigmoid(pi) pi = tf.concat(0, [pi[0:(self.K-1)], tf.expand_dims(1.0 - tf.reduce_sum(pi[0:(self.K-1)]), 0)]) sigmas = tf.nn.softplus(sigmas) log_prior = dirichlet.logpdf(pi, self.alpha) for k in xrange(self.K): log_prior += norm.logpdf(mus[k*self.D], 0, np.sqrt(self.c)) log_prior += norm.logpdf(mus[k*self.D+1], 0, np.sqrt(self.c)) log_prior += invgamma.logpdf(sigmas[k*self.D], self.a, self.b) log_prior += invgamma.logpdf(sigmas[k*self.D+1], self.a, self.b) log_lik = tf.constant(0.0, dtype=tf.float32) for x in tf.unpack(xs): for k in xrange(self.K): log_lik += tf.log(pi[k]) log_lik += multivariate_normal.logpdf(x, mus[(k*self.D):((k+1)*self.D)], sigmas[(k*self.D):((k+1)*self.D)]) log_prob += [log_prior + log_lik] return tf.pack(log_prob)
def logpmf(self, x, n, p): """Log of the probability mass function. Parameters ---------- x : tf.Tensor A n-D tensor for n > 1, where the inner (right-most) dimension represents the multivariate dimension. Each element is the number of outcomes in a bucket and not a one-hot. n : tf.Tensor A tensor of one less dimension than ``x``, representing the number of outcomes, equal to sum x[i] along the inner (right-most) dimension. p : tf.Tensor A tensor of one less dimension than ``x``, representing probabilities which sum to 1. Returns ------- tf.Tensor A tensor of one dimension less than the input. """ x = tf.cast(x, dtype=tf.float32) n = tf.cast(n, dtype=tf.float32) p = tf.cast(p, dtype=tf.float32) multivariate_idx = len(get_dims(x)) - 1 if multivariate_idx == 0: return tf.lgamma(n + 1.0) - \ tf.reduce_sum(tf.lgamma(x + 1.0)) + \ tf.reduce_sum(x * tf.log(p)) else: return tf.lgamma(n + 1.0) - \ tf.reduce_sum(tf.lgamma(x + 1.0), multivariate_idx) + \ tf.reduce_sum(x * tf.log(p), multivariate_idx)
def log_prob(self, xs): """ Parameters ---------- xs : list or tf.Tensor or np.array If more than one layer, a list of tf.Tensors or np.array's of dimension (batch x shape). If one layer, a tf.Tensor or np.array of (batch x shape). Notes ----- This method may be removed in the future in favor of indexable log_prob methods, e.g., for automatic Rao-Blackwellization. This method assumes each xs[l] in xs has the same batch size, i.e., dimensions (batch x shape) for fixed batch and varying shape. This method assumes length of xs == length of self.layers. """ if len(self.layers) == 1: return self.layers[0].log_prob(xs) n_minibatch = get_dims(xs[0])[0] log_prob = tf.zeros([n_minibatch], dtype=tf.float32) for l, layer in enumerate(self.layers): log_prob += layer.log_prob(xs[l]) return log_prob
def logpdf(self, x, alpha): """Log of the probability density function. Parameters ---------- x : tf.Tensor A n-D tensor for n > 1, where the inner (right-most) dimension represents the multivariate dimension. alpha : tf.Tensor A tensor of same shape as ``x``, and with each :math:`\\alpha` constrained to :math:`\\alpha_i > 0`. Returns ------- tf.Tensor A tensor of one dimension less than the input. """ x = tf.cast(x, dtype=tf.float32) alpha = tf.cast(alpha, dtype=tf.float32) multivariate_idx = len(get_dims(x)) - 1 if multivariate_idx == 0: return -tf.lbeta(alpha) + tf.reduce_sum((alpha - 1.0) * tf.log(x)) else: return -tf.lbeta(alpha) + \ tf.reduce_sum((alpha-1.0) * tf.log(x), multivariate_idx)
def log_prob(self, xs, zs): """Return scalar, the log joint density log p(xs, zs).""" x = xs["x"] pi, mus, sigmas = zs["pi"], zs["mu"], zs["sigma"] log_prior = dirichlet.logpdf(pi, self.alpha) log_prior += tf.reduce_sum(norm.logpdf(mus, 0.0, self.c)) log_prior += tf.reduce_sum(invgamma.logpdf(sigmas, self.a, self.b)) # log-likelihood is # sum_{n=1}^N log sum_{k=1}^K exp( log pi_k + log N(x_n; mu_k, sigma_k) ) # Create a K x N matrix, whose entry (k, n) is # log pi_k + log N(x_n; mu_k, sigma_k). N = get_dims(x)[0] matrix = [] for k in range(self.K): matrix += [ tf.ones(N) * tf.log(pi[k]) + multivariate_normal_diag.logpdf( x, mus[(k * self.D) : ((k + 1) * self.D)], sigmas[(k * self.D) : ((k + 1) * self.D)] ) ] matrix = tf.pack(matrix) # log_sum_exp() along the rows is a vector, whose nth # element is the log-likelihood of data point x_n. vector = log_sum_exp(matrix, 0) # Sum over data points to get the full log-likelihood. log_lik = tf.reduce_sum(vector) return log_prior + log_lik
def log_prob(self, xs, zs): """Returns a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])].""" N = get_dims(xs)[0] # Loop over each mini-batch zs[b,:] log_prob = [] for z in tf.unpack(zs): pi, mus, sigmas = self.unpack_params(z) log_prior = dirichlet.logpdf(pi, self.alpha) for k in xrange(self.K): log_prior += norm.logpdf(mus[k * self.D], 0, np.sqrt(self.c)) log_prior += norm.logpdf(mus[k * self.D + 1], 0, np.sqrt(self.c)) log_prior += invgamma.logpdf(sigmas[k * self.D], self.a, self.b) log_prior += invgamma.logpdf(sigmas[k * self.D + 1], self.a, self.b) log_lik = tf.constant(0.0, dtype=tf.float32) for x in tf.unpack(xs): for k in xrange(self.K): log_lik += tf.log(pi[k]) log_lik += multivariate_normal.logpdf( x, mus[(k * self.D):((k + 1) * self.D)], sigmas[(k * self.D):((k + 1) * self.D)]) log_prob += [log_prior + log_lik] return tf.pack(log_prob)
def log_prob(self, xs): """ Parameters ---------- xs : list of tf.Tensor or tf.Tensor If more than one layer, a list of tf.Tensors of dimension (batch x shape). If one layer, a tf.Tensor of (batch x shape). Notes ----- This method may be removed in the future in favor of indexable log_prob methods, e.g., for automatic Rao-Blackwellization. This method assumes each xs[l] in xs has the same batch size, i.e., dimensions (batch x shape) for fixed batch and varying shape. This method assumes length of xs == length of self.layers. """ if len(self.layers) == 1: return self.layers[0].log_prob(xs) n_samples = get_dims(xs[0])[0] log_prob = tf.zeros([n_samples], dtype=tf.float32) for l, layer in enumerate(self.layers): log_prob += layer.log_prob(xs[l]) return log_prob
def log_prob(self, xs, zs): """Returns a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])].""" N = get_dims(xs)[0] # Loop over each mini-batch zs[b,:] log_prob = [] for z in tf.unpack(zs): pi, mus, sigmas = self.unpack_params(z) log_prior = dirichlet.logpdf(pi, self.alpha) for k in xrange(self.K): log_prior += norm.logpdf(mus[k*self.D], 0, np.sqrt(self.c)) log_prior += norm.logpdf(mus[k*self.D+1], 0, np.sqrt(self.c)) log_prior += invgamma.logpdf(sigmas[k*self.D], self.a, self.b) log_prior += invgamma.logpdf(sigmas[k*self.D+1], self.a, self.b) log_lik = tf.constant(0.0, dtype=tf.float32) for x in tf.unpack(xs): for k in xrange(self.K): log_lik += tf.log(pi[k]) log_lik += multivariate_normal.logpdf(x, mus[(k*self.D):((k+1)*self.D)], sigmas[(k*self.D):((k+1)*self.D)]) log_prob += [log_prior + log_lik] return tf.pack(log_prob)
def np_dict(self, samples): """ Form dictionary to feed any placeholders with np.array samples. Parameters ---------- samples : list or tf.Tensor If more than one layer, a list of tf.Tensors of dimension (batch x shape). If one layer, a tf.Tensor of (batch x shape). Notes ----- This method assumes each samples[l] in samples has the same batch size, i.e., dimensions (batch x shape) for fixed batch and varying shape. """ if not isinstance(samples, list): samples = [samples] size = get_dims(samples[0])[0] feed_dict = {} for sample, layer in zip(samples, self.layers): if sample.name.startswith('Placeholder'): feed_dict[sample] = layer.sample(size) return feed_dict
def entropy(self, mean=None, cov=1): """Entropy of probability distribution. This is not vectorized with respect to any arguments. Parameters ---------- mean : tf.Tensor, optional A 1-D tensor. Defaults to zero mean. cov : tf.Tensor, optional A 1-D or 2-D tensor. Defaults to identity matrix. Returns ------- tf.Tensor A tensor of one dimension less than the input. """ if cov is 1: d = 1 det_cov = 1.0 else: cov = tf.cast(cov, dtype=tf.float32) d = get_dims(cov)[0] if len(cov.get_shape()) == 1: det_cov = tf.reduce_prod(cov) else: det_cov = tf.matrix_determinant(cov) return 0.5 * (d + d * tf.log(2 * np.pi) + tf.log(det_cov))
def logpdf(self, x, alpha): """Log of the probability density function. Parameters ---------- x : tf.Tensor A n-D tensor for n > 1, where the inner (right-most) dimension represents the multivariate dimension. alpha : tf.Tensor A tensor of same shape as ``x``, and with each :math:`\\alpha` constrained to :math:`\\alpha_i > 0`. Returns ------- tf.Tensor A tensor of one dimension less than the input. """ x = tf.cast(x, dtype=tf.float32) alpha = tf.cast(alpha, dtype=tf.float32) multivariate_idx = len(get_dims(x)) - 1 if multivariate_idx == 0: return -tf.lbeta(alpha) + tf.reduce_sum((alpha-1.0) * tf.log(x)) else: return -tf.lbeta(alpha) + \ tf.reduce_sum((alpha-1.0) * tf.log(x), multivariate_idx)
def __init__(self, params, validate_args=False, allow_nan_stats=True, name="Empirical", *args, **kwargs): with tf.name_scope(name, values=[params]) as ns: with tf.control_dependencies([]): self._params = tf.identity(params, name="params") try: self._n = get_dims(self._params)[0] except: # scalar params self._n = 1 super(Empirical, self).__init__(dtype=self._params.dtype, parameters={ "params": self._params, "n": self._n }, is_continuous=False, is_reparameterized=True, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=ns, *args, **kwargs)
def entropy(self, mean=None, cov=1): """ Note entropy does not depend on its mean. Arguments ---------- mean: tf.Tensor, optional vector. Defaults to zero mean. cov: tf.Tensor, optional vector or matrix. Defaults to identity. Returns ------- tf.Tensor scalar """ if cov == 1: d = 1 det_cov = 1.0 else: cov = tf.cast(cov, dtype=tf.float32) d = get_dims(cov)[0] if len(cov.get_shape()) == 1: det_cov = tf.reduce_prod(cov) else: det_cov = tf.matrix_determinant(cov) return 0.5 * (d + d*tf.log(2*np.pi) + tf.log(det_cov))
def logpdf(self, x, mean=None, cov=1): """Log of the probability density function. Parameters ---------- x : tf.Tensor A 1-D or 2-D tensor. mean : tf.Tensor, optional A 1-D tensor. Defaults to zero mean. cov : tf.Tensor, optional A 1-D or 2-D tensor. Defaults to identity matrix. Returns ------- tf.Tensor A tensor of one dimension less than the input. """ x = tf.cast(x, dtype=tf.float32) x_shape = get_dims(x) if len(x_shape) == 1: d = x_shape[0] else: d = x_shape[1] if mean is None: r = x else: mean = tf.cast(mean, dtype=tf.float32) r = x - mean if cov is 1: L_inv = tf.diag(tf.ones([d])) det_cov = tf.constant(1.0) else: cov = tf.cast(cov, dtype=tf.float32) if len(cov.get_shape()) == 1: # vector L_inv = tf.diag(1.0 / tf.sqrt(cov)) det_cov = tf.reduce_prod(cov) else: # matrix L = tf.cholesky(cov) L_inv = tf.matrix_inverse(L) det_cov = tf.pow(tf.reduce_prod(tf.diag_part(L)), 2) lps = -0.5*d*tf.log(2*np.pi) - 0.5*tf.log(det_cov) if len(x_shape) == 1: # vector r = tf.reshape(r, shape=(d, 1)) inner = tf.matmul(L_inv, r) lps -= 0.5 * tf.matmul(inner, inner, transpose_a=True) return tf.squeeze(lps) else: # matrix # TODO vectorize further out = [] for r_vec in tf.unpack(r): r_vec = tf.reshape(r_vec, shape=(d, 1)) inner = tf.matmul(L_inv, r_vec) out += [tf.squeeze(lps - 0.5 * tf.matmul(inner, inner, transpose_a=True))] return tf.pack(out)
def log_prob(self, xs): """Evaluate log probability. ``log p(xs | params)`` Parameters ---------- xs : tf.Tensor or np.ndarray n x self.shape Returns ------- tf.Tensor A vector for each log density evaluation, .. code-block:: none [ sum_{idx in shape} log p(xs[1, idx] | params[idx]), ..., sum_{idx in shape} log p(xs[n, idx] | params[idx]) ] """ # Loop over each random variable. # If distribution is univariate, this is over all indices; if # distribution is multivariate, this is over all but the last # index. n = get_dims(xs)[0] log_prob = tf.zeros([n], dtype=tf.float32) if len(self.shape) == 1: if self.is_multivariate: idx = () log_prob += self.log_prob_idx(idx, xs) else: for idx in product(range(self.shape[0])): log_prob += self.log_prob_idx(idx, xs) elif len(self.shape) == 2: if self.is_multivariate: for idx in product(range(self.shape[0])): log_prob += self.log_prob_idx(idx, xs) else: for idx in product(range(self.shape[0]), range(self.shape[1])): log_prob += self.log_prob_idx(idx, xs) elif len(self.shape) == 3: if self.is_multivariate: for idx in product(range(self.shape[0]), range(self.shape[1])): log_prob += self.log_prob_idx(idx, xs) else: for idx in product(range(self.shape[0]), range(self.shape[1]), range(self.shape[2])): log_prob += self.log_prob_idx(idx, xs) else: # len(self.shape) >= 4 # There should be a generic solution. raise NotImplementedError() return log_prob
def _test(self, input, multiples): if isinstance(multiples, int) or isinstance(multiples, float): multiples_shape = [multiples] elif isinstance(multiples, tuple): multiples_shape = list(multiples) else: multiples_shape = multiples input_shape = get_dims(input) diff = len(input_shape) - len(multiples_shape) if diff < 0: input_shape = [1] * abs(diff) + input_shape elif diff > 0: multiples_shape = [1] * diff + multiples_shape val_true = [x * y for x, y in zip(input_shape, multiples_shape)] with self.test_session(): val_est = get_dims(tile(input, multiples)) assert val_est == val_true
def log_prob(self, xs, zs): """ Return scalar, the log joint density log p(xs, zs). Given n_minibatch data points, n_samples of variables Summing over the datapoints makes sense since the joint is the only place in the estiamtion of the gradient that has the data points, and its the log, so we can sum over them BUT summing over the variables doenst make sense,its supposed to be one at a time """ x = xs['x'] pi, mus, sigmas = zs['pi'], zs['mu'], zs['sigma'] # print(get_dims(x)) #[n_minibatch, D] # print(get_dims(pi)) #[K] # print(get_dims(mus)) #[K*D] # print(get_dims(sigmas)) #[K*D] log_prior = dirichlet.logpdf(pi, self.alpha) log_prior += tf.reduce_sum(norm.logpdf(mus, 0.0, self.c)) log_prior += tf.reduce_sum(invgamma.logpdf(sigmas, self.a, self.b)) # log-likelihood is # sum_{n=1}^N log sum_{k=1}^K exp( log pi_k + log N(x_n; mu_k, sigma_k) ) # Create a K x N matrix, whose entry (k, n) is # log pi_k + log N(x_n; mu_k, sigma_k). n_minibatch = get_dims(x)[ 0] #this is [n_minibatch, D], with [0] its just n_minibatch #OH I think they compute the matrix so that they can do log sum exp, since they need to find the max value matrix = [] for k in range(self.K): # bbbb = tf.log(pi[k]) # print(get_dims(bbbb)) # aaaa= multivariate_normal_diag.logpdf(x, mus[(k * self.D):((k + 1) * self.D)], sigmas[(k * self.D):((k + 1) * self.D)]) # print(get_dims(aaaa)) # fadad matrix += [ tf.ones(n_minibatch) * tf.log(pi[k]) + multivariate_normal_diag.logpdf( x, mus[(k * self.D):((k + 1) * self.D)], sigmas[(k * self.D):((k + 1) * self.D)]) ] matrix = tf.pack(matrix) # log_sum_exp() along the rows is a vector, whose nth # element is the log-likelihood of data point x_n. vector = log_sum_exp(matrix, 0) # Sum over data points to get the full log-likelihood. log_lik = tf.reduce_sum(vector) return log_prior + log_lik
def entropy(self, alpha): """ Arguments ---------- alpha: np.array or tf.Tensor vector or matrix """ alpha = tf.cast(tf.convert_to_tensor(alpha), dtype=tf.float32) if len(get_dims(alpha)) == 1: K = get_dims(alpha)[0] a = tf.reduce_sum(alpha) return lbeta(alpha) + \ tf.mul(a - K, digamma(a)) - \ tf.reduce_sum(tf.mul(alpha-1, digamma(alpha))) else: K = get_dims(alpha)[1] a = tf.reduce_sum(alpha, 1) return lbeta(alpha) + \ tf.mul(a - K, digamma(a)) - \ tf.reduce_sum(tf.mul(alpha-1, digamma(alpha)), 1)
def logpdf(self, x, mean=None, cov=1): """ Parameters ---------- x : np.array or tf.Tensor vector or matrix mean : np.array or tf.Tensor, optional vector. Defaults to zero mean. cov : np.array or tf.Tensor, optional vector or matrix. Defaults to identity. """ x = tf.cast(tf.squeeze(tf.convert_to_tensor(x)), dtype=tf.float32) x_shape = get_dims(x) if len(x_shape) == 1: d = x_shape[0] else: d = x_shape[1] if mean is None: r = x else: mean = tf.cast(tf.squeeze(tf.convert_to_tensor(mean)), dtype=tf.float32) r = x - mean if cov is 1: cov_inv = tf.diag(tf.ones([d])) det_cov = tf.constant(1.0) else: cov = tf.cast(tf.squeeze(tf.convert_to_tensor(cov)), dtype=tf.float32) if len(cov.get_shape()) == 1: # vector cov_inv = tf.diag(1.0 / cov) det_cov = tf.reduce_prod(cov) else: # matrix cov_inv = tf.matrix_inverse(cov) det_cov = tf.matrix_determinant(cov) lps = -0.5 * d * tf.log(2 * np.pi) - 0.5 * tf.log(det_cov) if len(x_shape) == 1: r = tf.reshape(r, shape=(d, 1)) lps -= 0.5 * tf.matmul(tf.matmul(r, cov_inv, transpose_a=True), r) return tf.squeeze(lps) else: # TODO vectorize further out = [] for r_vec in tf.unpack(r): r_vec = tf.reshape(r_vec, shape=(d, 1)) out += [ tf.squeeze(lps - 0.5 * tf.matmul( tf.matmul(r_vec, cov_inv, transpose_a=True), r_vec)) ] return tf.pack(out) """
def log_prob(self, xs, zs): """Returns a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])].""" N = get_dims(xs['x'])[0] pi, mus, sigmas = self.unpack_params(zs) log_prior = dirichlet.logpdf(pi, self.alpha) log_prior += tf.reduce_sum(norm.logpdf(mus, 0, np.sqrt(self.c))) log_prior += tf.reduce_sum(invgamma.logpdf(sigmas, self.a, self.b)) # Loop over each mini-batch zs[b,:] log_lik = [] n_minibatch = get_dims(zs)[0] for s in range(n_minibatch): log_lik_z = N*tf.reduce_sum(tf.log(pi)) for k in range(self.K): log_lik_z += tf.reduce_sum(multivariate_normal.logpdf(xs['x'], mus[s, (k*self.D):((k+1)*self.D)], sigmas[s, (k*self.D):((k+1)*self.D)])) log_lik += [log_lik_z] return log_prior + tf.pack(log_lik)
def log_prob(self, xs, zs): """Returns a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])].""" N = get_dims(xs['x'])[0] pi, mus, sigmas = self.unpack_params(zs) log_prior = dirichlet.logpdf(pi, self.alpha) log_prior += tf.reduce_sum(norm.logpdf(mus, 0, np.sqrt(self.c))) log_prior += tf.reduce_sum(invgamma.logpdf(sigmas, self.a, self.b)) # Loop over each sample zs[b,:] log_lik = [] n_samples = get_dims(zs)[0] for s in range(n_samples): log_lik_z = N*tf.reduce_sum(tf.log(pi)) for k in range(self.K): log_lik_z += tf.reduce_sum(multivariate_normal.logpdf(xs['x'], mus[s, (k*self.D):((k+1)*self.D)], sigmas[s, (k*self.D):((k+1)*self.D)])) log_lik += [log_lik_z] return log_prior + tf.pack(log_lik)
def logpdf(self, x, mean=None, cov=1): """ Parameters ---------- x : np.array or tf.Tensor vector or matrix mean : np.array or tf.Tensor, optional vector. Defaults to zero mean. cov : np.array or tf.Tensor, optional vector or matrix. Defaults to identity. """ x = tf.cast(tf.convert_to_tensor(x), dtype=tf.float32) x_shape = get_dims(x) if len(x_shape) == 1: d = x_shape[0] else: d = x_shape[1] if mean is None: r = x else: mean = tf.cast(tf.convert_to_tensor(mean), dtype=tf.float32) r = x - mean if cov is 1: cov_inv = tf.diag(tf.ones([d])) det_cov = tf.constant(1.0) else: cov = tf.cast(tf.convert_to_tensor(cov), dtype=tf.float32) if len(cov.get_shape()) == 1: # vector cov_inv = tf.diag(1.0 / cov) det_cov = tf.reduce_prod(cov) else: # matrix cov_inv = tf.matrix_inverse(cov) det_cov = tf.matrix_determinant(cov) lps = -0.5*d*tf.log(2*np.pi) - 0.5*tf.log(det_cov) if len(x_shape) == 1: r = tf.reshape(r, shape=(d, 1)) lps -= 0.5 * tf.matmul(tf.matmul(r, cov_inv, transpose_a=True), r) return tf.squeeze(lps) else: # TODO vectorize further out = [] for r_vec in tf.unpack(r): r_vec = tf.reshape(r_vec, shape=(d, 1)) out += [tf.squeeze(lps - 0.5 * tf.matmul( tf.matmul(r_vec, cov_inv, transpose_a=True), r_vec))] return tf.pack(out) """
def logpdf(self, x, mean=None, cov=1): """ Arguments ---------- x: tf.Tensor vector mean: tf.Tensor, optional vector. Defaults to zero mean. cov: tf.Tensor, optional vector or matrix. Defaults to identity. Returns ------- tf.Tensor scalar """ x = tf.cast(tf.squeeze(x), dtype=tf.float32) d = get_dims(x)[0] if mean is None: r = tf.ones([d]) * x else: mean = tf.cast(tf.squeeze(mean), dtype=tf.float32) r = x - mean if cov == 1: cov_inv = tf.diag(tf.ones([d])) det_cov = tf.constant(1.0) else: cov = tf.cast(tf.squeeze(cov), dtype=tf.float32) if len(cov.get_shape()) == 1: cov_inv = tf.diag(1.0 / cov) det_cov = tf.reduce_prod(cov) else: cov_inv = tf.matrix_inverse(cov) det_cov = tf.matrix_determinant(cov) r = tf.reshape(r, shape=(d, 1)) lps = -0.5*d*tf.log(2*np.pi) - 0.5*tf.log(det_cov) - \ 0.5 * tf.matmul(tf.matmul(r, cov_inv, transpose_a=True), r) """ # TensorFlow can't reverse-mode autodiff Cholesky L = tf.cholesky(cov) L_inv = tf.matrix_inverse(L) det_cov = tf.pow(tf.matrix_determinant(L), 2) inner = dot(L_inv, r) out = -0.5*d*tf.log(2*np.pi) - \ 0.5*tf.log(det_cov) - \ 0.5*tf.matmul(tf.transpose(inner), inner) """ return tf.squeeze(lps)
def logpdf(self, x, alpha): """ Parameters ---------- x : np.array or tf.Tensor vector or matrix alpha : np.array or tf.Tensor vector """ x = tf.cast(x, dtype=tf.float32) alpha = tf.cast(tf.convert_to_tensor(alpha), dtype=tf.float32) if len(get_dims(x)) == 1: return -lbeta(alpha) + tf.reduce_sum(tf.mul(alpha-1, tf.log(x))) else: return -lbeta(alpha) + tf.reduce_sum(tf.mul(alpha-1, tf.log(x)), 1)
def logpdf(self, x, mean=None, cov=1): """ Arguments ---------- x: np.array or tf.Tensor vector mean: np.array or tf.Tensor, optional vector. Defaults to zero mean. cov: np.array or tf.Tensor, optional vector or matrix. Defaults to identity. """ x = tf.cast(tf.squeeze(tf.convert_to_tensor(x)), dtype=tf.float32) d = get_dims(x)[0] if mean is None: r = tf.ones([d]) * x else: mean = tf.cast(tf.squeeze(tf.convert_to_tensor(mean)), dtype=tf.float32) r = x - mean if cov is 1: cov_inv = tf.diag(tf.ones([d])) det_cov = tf.constant(1.0) else: cov = tf.cast(tf.squeeze(tf.convert_to_tensor(cov)), dtype=tf.float32) if len(cov.get_shape()) == 1: cov_inv = tf.diag(1.0 / cov) det_cov = tf.reduce_prod(cov) else: cov_inv = tf.matrix_inverse(cov) det_cov = tf.matrix_determinant(cov) r = tf.reshape(r, shape=(d, 1)) lps = -0.5*d*tf.log(2*np.pi) - 0.5*tf.log(det_cov) - \ 0.5 * tf.matmul(tf.matmul(r, cov_inv, transpose_a=True), r) """ # TensorFlow can't reverse-mode autodiff Cholesky L = tf.cholesky(cov) L_inv = tf.matrix_inverse(L) det_cov = tf.pow(tf.matrix_determinant(L), 2) inner = dot(L_inv, r) out = -0.5*d*tf.log(2*np.pi) - \ 0.5*tf.log(det_cov) - \ 0.5*tf.matmul(tf.transpose(inner), inner) """ return tf.squeeze(lps)
def logpdf(self, x, alpha): """ Arguments ---------- x: np.array or tf.Tensor vector or matrix alpha: np.array or tf.Tensor vector """ x = tf.cast(tf.squeeze(x), dtype=tf.float32) alpha = tf.cast(tf.squeeze(tf.convert_to_tensor(alpha)), dtype=tf.float32) if len(get_dims(x)) == 1: return -multivariate_log_beta(alpha) + \ tf.reduce_sum(tf.mul(alpha-1, tf.log(x))) else: return -multivariate_log_beta(alpha) + \ tf.reduce_sum(tf.mul(alpha-1, tf.log(x)), 1)
def __init__(self, params, validate_args=False, allow_nan_stats=True, name="Empirical"): with ops.name_scope(name, values=[params]) as ns: with ops.control_dependencies([]): self._params = array_ops.identity(params, name="params") try: self._n = get_dims(self._params)[0] except: # scalar params self._n = 1 super(Empirical, self).__init__( dtype=self._params.dtype, parameters={"params": self._params, "n": self._n}, is_continuous=False, is_reparameterized=True, validate_args=validate_args, allow_nan_stats=allow_nan_stats, name=ns)
def logpmf(self, x, n, p): """ Parameters ---------- x : np.array or tf.Tensor vector of length K, where x[i] is the number of outcomes in the ith bucket, or matrix with column length K n : int or tf.Tensor number of outcomes equal to sum x[i] p : np.array or tf.Tensor vector of probabilities summing to 1 """ x = tf.cast(x, dtype=tf.float32) n = tf.cast(n, dtype=tf.float32) p = tf.cast(p, dtype=tf.float32) if len(get_dims(x)) == 1: return lgamma(n + 1.0) - \ tf.reduce_sum(lgamma(x + 1.0)) + \ tf.reduce_sum(tf.mul(x, tf.log(p))) else: return lgamma(n + 1.0) - \ tf.reduce_sum(lgamma(x + 1.0), 1) + \ tf.reduce_sum(tf.mul(x, tf.log(p)), 1)
def entropy(self, mean=None, cov=1): """ Note entropy does not depend on its mean. Parameters ---------- mean : np.array or tf.Tensor, optional vector. Defaults to zero mean. cov : np.array or tf.Tensor, optional vector or matrix. Defaults to identity. """ if cov is 1: d = 1 det_cov = 1.0 else: cov = tf.cast(tf.convert_to_tensor(cov), dtype=tf.float32) d = get_dims(cov)[0] if len(cov.get_shape()) == 1: det_cov = tf.reduce_prod(cov) else: det_cov = tf.matrix_determinant(cov) return 0.5 * (d + d * tf.log(2 * np.pi) + tf.log(det_cov))
def logpmf(self, x, n, p): """ Parameters ---------- x : np.array or tf.Tensor vector of length K, where x[i] is the number of outcomes in the ith bucket, or matrix with column length K n : int or tf.Tensor number of outcomes equal to sum x[i] p : np.array or tf.Tensor vector of probabilities summing to 1 """ x = tf.cast(tf.squeeze(x), dtype=tf.float32) n = tf.cast(tf.squeeze(n), dtype=tf.float32) p = tf.cast(tf.squeeze(p), dtype=tf.float32) if len(get_dims(x)) == 1: return lgamma(n + 1.0) - \ tf.reduce_sum(lgamma(x + 1.0)) + \ tf.reduce_sum(tf.mul(x, tf.log(p))) else: return lgamma(n + 1.0) - \ tf.reduce_sum(lgamma(x + 1.0), 1) + \ tf.reduce_sum(tf.mul(x, tf.log(p)), 1)
def entropy(self, mean=None, cov=1): """ Note entropy does not depend on its mean. Parameters ---------- mean : np.array or tf.Tensor, optional vector. Defaults to zero mean. cov : np.array or tf.Tensor, optional vector or matrix. Defaults to identity. """ if cov is 1: d = 1 det_cov = 1.0 else: cov = tf.cast(tf.convert_to_tensor(cov), dtype=tf.float32) d = get_dims(cov)[0] if len(cov.get_shape()) == 1: det_cov = tf.reduce_prod(cov) else: det_cov = tf.matrix_determinant(cov) return 0.5 * (d + d*tf.log(2*np.pi) + tf.log(det_cov))
def _test(alpha, beta, n): x = InverseGamma(alpha=alpha, beta=beta) val_est = get_dims(x.sample(n)) val_true = n + get_dims(alpha) assert val_est == val_true
def __init__(self, mu, sigma): self.mu = mu self.sigma = sigma self.n_vars = get_dims(mu)[0]
def _test(shape, a, b, n): x = Beta(shape, a, b) val_est = tuple(get_dims(x.sample(n))) val_true = (n, ) + shape assert val_est == val_true
def logpdf(self, x, loc=0, scale=1): # Note there is no error checking if x is outside domain. scale = tf.cast(scale, dtype=tf.float32) return tf.squeeze(tf.ones(get_dims(x)) * -tf.log(scale))
def _test(shape, loc, scale, n): x = Normal(shape, loc, scale) val_est = tuple(get_dims(x.sample(n))) val_true = (n, ) + shape assert val_est == val_true
def _test(a, b, n): x = Uniform(a=a, b=b) val_est = get_dims(x.sample(n)) val_true = n + get_dims(a) assert val_est == val_true
def _test(p, n): x = Bernoulli(p=p) val_est = get_dims(x.sample(n)) val_true = n + get_dims(p) assert val_est == val_true
def _test(logits, n): x = Categorical(logits=logits) val_est = get_dims(x.sample(n)) val_true = n + get_dims(logits)[:-1] assert val_est == val_true
def _test(shape, a, b, size): x = Beta(shape, a, b) val_est = tuple(get_dims(x.sample(size=size))) val_true = (size, ) + shape assert val_est == val_true