Example #1
0
 def testInvalidShapeAtEval(self):
     with self.test_session(use_gpu=self._use_gpu):
         v = tf.placeholder(dtype=tf.float32)
         with self.assertRaisesOpError("input must be at least 2-dim"):
             tf.batch_matrix_diag_part(v).eval(feed_dict={v: 0.0})
         with self.assertRaisesOpError("last two dimensions must be equal"):
             tf.batch_matrix_diag_part(v).eval(
                 feed_dict={v: [[0, 1], [1, 0], [0, 0]]})
Example #2
0
 def testInvalidShapeAtEval(self):
   with self.test_session(use_gpu=self._use_gpu):
     v = tf.placeholder(dtype=tf.float32)
     with self.assertRaisesOpError("input must be at least 2-dim"):
       tf.batch_matrix_diag_part(v).eval(feed_dict={v: 0.0})
     with self.assertRaisesOpError("last two dimensions must be equal"):
       tf.batch_matrix_diag_part(v).eval(
           feed_dict={v: [[0, 1], [1, 0], [0, 0]]})
Example #3
0
    def testSample(self):
        with self.test_session():
            scale = make_pd(1., 2)
            df = 4

            chol_w = distributions.WishartCholesky(
                df, chol(scale), cholesky_input_output_matrices=False)

            x = chol_w.sample_n(1, seed=42).eval()
            chol_x = [chol(x[0])]

            full_w = distributions.WishartFull(
                df, scale, cholesky_input_output_matrices=False)
            self.assertAllClose(x, full_w.sample_n(1, seed=42).eval())

            chol_w_chol = distributions.WishartCholesky(
                df, chol(scale), cholesky_input_output_matrices=True)
            self.assertAllClose(chol_x,
                                chol_w_chol.sample_n(1, seed=42).eval())
            eigen_values = tf.batch_matrix_diag_part(
                chol_w_chol.sample_n(1000, seed=42))
            np.testing.assert_array_less(0., eigen_values.eval())

            full_w_chol = distributions.WishartFull(
                df, scale, cholesky_input_output_matrices=True)
            self.assertAllClose(chol_x,
                                full_w_chol.sample_n(1, seed=42).eval())
            eigen_values = tf.batch_matrix_diag_part(
                full_w_chol.sample_n(1000, seed=42))
            np.testing.assert_array_less(0., eigen_values.eval())

            # Check first and second moments.
            df = 4.
            chol_w = distributions.WishartCholesky(
                df=df,
                scale=chol(make_pd(1., 3)),
                cholesky_input_output_matrices=False)
            x = chol_w.sample_n(10000, seed=42)
            self.assertAllEqual((10000, 3, 3), x.get_shape())

            moment1_estimate = tf.reduce_mean(x, reduction_indices=[0]).eval()
            self.assertAllClose(chol_w.mean().eval(),
                                moment1_estimate,
                                rtol=0.05)

            # The Variance estimate uses the squares rather than outer-products
            # because Wishart.Variance is the diagonal of the Wishart covariance
            # matrix.
            variance_estimate = (
                tf.reduce_mean(tf.square(x), reduction_indices=[0]) -
                tf.square(moment1_estimate)).eval()
            self.assertAllClose(chol_w.variance().eval(),
                                variance_estimate,
                                rtol=0.05)
Example #4
0
  def testSample(self):
    with self.test_session():
      scale = make_pd(1., 2)
      df = 4

      chol_w = distributions.WishartCholesky(
          df, chol(scale), cholesky_input_output_matrices=False)

      x = chol_w.sample_n(1, seed=42).eval()
      chol_x = [chol(x[0])]

      full_w = distributions.WishartFull(
          df, scale, cholesky_input_output_matrices=False)
      self.assertAllClose(x, full_w.sample_n(1, seed=42).eval())

      chol_w_chol = distributions.WishartCholesky(
          df, chol(scale), cholesky_input_output_matrices=True)
      self.assertAllClose(chol_x, chol_w_chol.sample_n(1, seed=42).eval())
      eigen_values = tf.batch_matrix_diag_part(
          chol_w_chol.sample_n(1000, seed=42))
      np.testing.assert_array_less(0., eigen_values.eval())

      full_w_chol = distributions.WishartFull(
          df, scale, cholesky_input_output_matrices=True)
      self.assertAllClose(chol_x, full_w_chol.sample_n(1, seed=42).eval())
      eigen_values = tf.batch_matrix_diag_part(
          full_w_chol.sample_n(1000, seed=42))
      np.testing.assert_array_less(0., eigen_values.eval())

      # Check first and second moments.
      df = 4.
      chol_w = distributions.WishartCholesky(
          df=df,
          scale=chol(make_pd(1., 3)),
          cholesky_input_output_matrices=False)
      x = chol_w.sample_n(10000, seed=42)
      self.assertAllEqual((10000, 3, 3), x.get_shape())

      moment1_estimate = tf.reduce_mean(x, reduction_indices=[0]).eval()
      self.assertAllClose(chol_w.mean().eval(),
                          moment1_estimate,
                          rtol=0.05)

      # The Variance estimate uses the squares rather than outer-products
      # because Wishart.Variance is the diagonal of the Wishart covariance
      # matrix.
      variance_estimate = (
          tf.reduce_mean(tf.square(x), reduction_indices=[0]) -
          tf.square(moment1_estimate)).eval()
      self.assertAllClose(chol_w.variance().eval(),
                          variance_estimate,
                          rtol=0.05)
Example #5
0
def gauss_kl_white(q_mu, q_sqrt):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, I)

    We assume multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance.
    """
    KL = 0.5 * tf.reduce_sum(tf.square(q_mu))  # Mahalanobis term
    KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]),
                         tf.float64)  # constant term
    L = tf.batch_matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1,
                                  0)  # force lower triangle
    KL -= 0.5 * tf.reduce_sum(tf.log(tf.square(
        tf.batch_matrix_diag_part(L))))  # logdet
    KL += 0.5 * tf.reduce_sum(tf.square(L))  # Trace term.
    return KL
Example #6
0
def gauss_kl(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    num_latent = tf.cast(tf.shape(q_sqrt)[2], tf.float64)
    KL += num_latent * 0.5 * tf.reduce_sum(tf.log(tf.square(
        tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]),
                         tf.float64)  # constant term
    Lq = tf.batch_matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1,
                                   0)  # force lower triangle
    KL += -0.5 * tf.reduce_sum(tf.log(tf.square(
        tf.batch_matrix_diag_part(Lq))))  # logdet
    L_tiled = tf.tile(tf.expand_dims(L, 0), tf.pack([tf.shape(Lq)[0], 1, 1]))
    LiLq = tf.batch_matrix_triangular_solve(L_tiled, Lq, lower=True)
    KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
Example #7
0
 def testMatrix(self):
   with self.test_session(use_gpu=self._use_gpu):
     v = np.array([1.0, 2.0, 3.0])
     mat = np.diag(v)
     mat_diag = tf.batch_matrix_diag_part(mat)
     self.assertEqual((3,), mat_diag.get_shape())
     self.assertAllEqual(mat_diag.eval(), v)
Example #8
0
 def testMatrix(self):
     with self.test_session(use_gpu=self._use_gpu):
         v = np.array([1.0, 2.0, 3.0])
         mat = np.diag(v)
         mat_diag = tf.batch_matrix_diag_part(mat)
         self.assertEqual((3, ), mat_diag.get_shape())
         self.assertAllEqual(mat_diag.eval(), v)
Example #9
0
 def testGrad(self):
   shapes = ((3, 3), (5, 3, 3))
   with self.test_session(use_gpu=self._use_gpu):
     for shape in shapes:
       x = tf.constant(np.random.rand(*shape), dtype=np.float32)
       y = tf.batch_matrix_diag_part(x)
       error = tf.test.compute_gradient_error(x, x.get_shape().as_list(),
                                              y, y.get_shape().as_list())
       self.assertLess(error, 1e-4)
Example #10
0
 def testGrad(self):
   shapes = ((3, 3), (5, 3, 3))
   with self.test_session(use_gpu=self._use_gpu):
     for shape in shapes:
       x = tf.constant(np.random.rand(*shape), dtype=np.float32)
       y = tf.batch_matrix_diag_part(x)
       error = tf.test.compute_gradient_error(x, x.get_shape().as_list(),
                                              y, y.get_shape().as_list())
       self.assertLess(error, 1e-4)
Example #11
0
def vec2lower_triangle(vec, dim):
    """
    Convert a vector M of size (n * m) into a matrix of shape (n, m)
    [[e^M[0],    0,           0,             ...,    0]
     [M[n-1],    e^M[n],      0,      0,     ...,    0]
     [M[2n-1],   M[2n],       e^M[2n+1], 0,  ...,    0]
     ...
     [M[m(n-1)], M[m(n-1)+1], ...,       M[mn-2], e^M[mn-1]]
    """
    L = tf.reshape(vec, [-1, dim, dim])
    if int(tf.__version__.split('.')[1]) >= 10:
        L = tf.matrix_band_part(L, -1, 0) - tf.matrix_diag(
            tf.matrix_diag_part(L)) + tf.matrix_diag(
                tf.exp(tf.matrix_diag_part(L)))
    else:
        L = tf.batch_matrix_band_part(L, -1, 0) - tf.batch_matrix_diag(
            tf.batch_matrix_diag_part(L)) + tf.batch_matrix_diag(
                tf.exp(tf.batch_matrix_diag_part(L)))
    return L
Example #12
0
 def testBatchMatrix(self):
     with self.test_session(use_gpu=self._use_gpu):
         v_batch = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
         mat_batch = np.array([[[1.0, 0.0, 0.0], [0.0, 2.0, 0.0],
                                [0.0, 0.0, 3.0]],
                               [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0],
                                [0.0, 0.0, 6.0]]])
         self.assertEqual(mat_batch.shape, (2, 3, 3))
         mat_batch_diag = tf.batch_matrix_diag_part(mat_batch)
         self.assertEqual((2, 3), mat_batch_diag.get_shape())
         self.assertAllEqual(mat_batch_diag.eval(), v_batch)
Example #13
0
 def testBatchMatrix(self):
   with self.test_session(use_gpu=self._use_gpu):
     v_batch = np.array([[1.0, 2.0, 3.0],
                         [4.0, 5.0, 6.0]])
     mat_batch = np.array(
         [[[1.0, 0.0, 0.0],
           [0.0, 2.0, 0.0],
           [0.0, 0.0, 3.0]],
          [[4.0, 0.0, 0.0],
           [0.0, 5.0, 0.0],
           [0.0, 0.0, 6.0]]])
     self.assertEqual(mat_batch.shape, (2, 3, 3))
     mat_batch_diag = tf.batch_matrix_diag_part(mat_batch)
     self.assertEqual((2, 3), mat_batch_diag.get_shape())
     self.assertAllEqual(mat_batch_diag.eval(), v_batch)
Example #14
0
 def _sample(self, N):
     """
     :param integer N: number of samples
     :Returns
      samples picked from the variational posterior.
      The Kulback_leibler divergence is stored as self._KL
     """
     n = self.num_data
     R = self.num_latent
     # Match dimension of the posterior variance to the data.
     if self.q_diag:
         sqrt = tf.batch_matrix_diag(tf.transpose(self.q_sqrt)) # [R,n,n]
     else:
         sqrt = tf.batch_matrix_band_part(
                         tf.transpose(self.q_sqrt,[2,0,1]), -1, 0) # [R,n,n]
     # Log determinant of matrix S = q_sqrt * q_sqrt^T
     logdet_S = tf.cast(N, float_type)*tf.reduce_sum(
             tf.log(tf.square(tf.batch_matrix_diag_part(sqrt))))
     sqrt = tf.tile(tf.expand_dims(sqrt, 1), [1,N,1,1]) # [R,N,n,n]
     # noraml random samples, [R,N,n,1]
     v_samples = tf.random_normal([R,N,n,1], dtype=float_type)
     # Match dimension of the posterior mean, [R,N,n,1]
     mu = tf.tile(tf.expand_dims(tf.expand_dims(
                             tf.transpose(self.q_mu), 1), -1), [1,N,1,1])
     u_samples = mu + tf.batch_matmul(sqrt, v_samples)
     # Stochastic approximation of the Kulback_leibler KL[q(f)||p(f)]
     self._KL = - 0.5 * logdet_S\
          - 0.5 * tf.reduce_sum(tf.square(v_samples)) \
          + 0.5 * tf.reduce_sum(tf.square(u_samples))
     # Cholesky factor of kernel [R,N,n,n]
     L = tf.tile(tf.expand_dims(
             tf.transpose(self.kern.Cholesky(self.X), [2,0,1]),1), [1,N,1,1])
     # mean, sized [N,n,R]
     mean = tf.tile(tf.expand_dims(
                 self.mean_function(self.X),
             0), [N,1,1])
     # sample from posterior, [N,n,R]
     f_samples = tf.transpose(
             tf.squeeze(tf.batch_matmul(L, u_samples),[-1]), # [R,N,n]
             [1,2,0]) + mean
     # return as Dict to deal with
     return f_samples
Example #15
0
  def _define_full_covariance_probs(self, shard_id, shard):
    """Defines the full covariance probabilties per example in a class.

    Updates a matrix with dimension num_examples X num_classes.

    Args:
      shard_id: id of the current shard.
      shard: current data shard, 1 X num_examples X dimensions.
    """
    diff = shard - self._means
    cholesky = tf.batch_cholesky(self._covs + self._min_var)
    log_det_covs = 2.0 * tf.reduce_sum(tf.log(
        tf.batch_matrix_diag_part(cholesky)), 1)
    x_mu_cov = tf.square(tf.batch_matrix_triangular_solve(
        cholesky, tf.transpose(diff, perm=[0, 2, 1]),
        lower=True))
    diag_m = tf.transpose(tf.reduce_sum(x_mu_cov, 1))
    self._probs[shard_id] = -0.5 * (
        diag_m + tf.to_float(self._dimensions) * tf.log(2 * np.pi) +
        log_det_covs)
Example #16
0
    def _define_full_covariance_probs(self, shard_id, shard):
        """Defines the full covariance probabilties per example in a class.

    Updates a matrix with dimension num_examples X num_classes.

    Args:
      shard_id: id of the current shard.
      shard: current data shard, 1 X num_examples X dimensions.
    """
        diff = shard - self._means
        cholesky = tf.cholesky(self._covs + self._min_var)
        log_det_covs = 2.0 * tf.reduce_sum(
            tf.log(tf.batch_matrix_diag_part(cholesky)), 1)
        x_mu_cov = tf.square(
            tf.matrix_triangular_solve(cholesky,
                                       tf.transpose(diff, perm=[0, 2, 1]),
                                       lower=True))
        diag_m = tf.transpose(tf.reduce_sum(x_mu_cov, 1))
        self._probs[shard_id] = -0.5 * (
            diag_m + tf.to_float(self._dimensions) * tf.log(2 * np.pi) +
            log_det_covs)
Example #17
0
    def build_likelihood(self):
        """
        q_alpha, q_lambda are variational parameters, size N x R

        This method computes the variational lower bound on the likelihood,
        which is:

            E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]

        with

            q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) .

        """
        K = self.kern.K(self.X)
        K_alpha = tf.matmul(K, self.q_alpha)
        f_mean = K_alpha + self.mean_function(self.X)

        # compute the variance for each of the outputs
        I = tf.tile(tf.expand_dims(eye(self.num_data), 0), [self.num_latent, 1, 1])
        A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \
            tf.expand_dims(tf.transpose(self.q_lambda), 2) * K
        L = tf.batch_cholesky(A)
        Li = tf.batch_matrix_triangular_solve(L, I)
        tmp = Li / tf.transpose(self.q_lambda)
        f_var = 1./tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1))

        # some statistics about A are used in the KL
        A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.batch_matrix_diag_part(L)))
        trAi = tf.reduce_sum(tf.square(Li))

        KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent
                    + tf.reduce_sum(K_alpha*self.q_alpha))

        v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y)
        return tf.reduce_sum(v_exp) - KL
Example #18
0
 def testInvalidShape(self):
     with self.assertRaisesRegexp(ValueError, "must have rank at least 2"):
         tf.batch_matrix_diag_part(0)
     with self.assertRaisesRegexp(ValueError,
                                  r"Dimensions .* not compatible"):
         tf.batch_matrix_diag_part([[0, 1], [1, 0], [0, 0]])
# Declare k-value and batch size
k = 4
batch_size=len(x_vals_test)

# Placeholders
x_data_train = tf.placeholder(shape=[None, num_features], dtype=tf.float32)
x_data_test = tf.placeholder(shape=[None, num_features], dtype=tf.float32)
y_target_train = tf.placeholder(shape=[None, 1], dtype=tf.float32)
y_target_test = tf.placeholder(shape=[None, 1], dtype=tf.float32)

# Declare weighted distance metric
# Weighted - L2 = sqrt((x-y)^T * A * (x-y))
subtraction_term =  tf.subtract(x_data_train, tf.expand_dims(x_data_test,1))
first_product = tf.matmul(subtraction_term, tf.tile(tf.expand_dims(weight_matrix,0), [batch_size,1,1]))
second_product = tf.matmul(first_product, tf.transpose(subtraction_term, perm=[0,2,1]))
distance = tf.sqrt(tf.batch_matrix_diag_part(second_product))

# Predict: Get min distance index (Nearest neighbor)
top_k_xvals, top_k_indices = tf.nn.top_k(tf.negative(distance), k=k)
x_sums = tf.expand_dims(tf.reduce_sum(top_k_xvals, 1),1)
x_sums_repeated = tf.matmul(x_sums,tf.ones([1, k], tf.float32))
x_val_weights = tf.expand_dims(tf.div(top_k_xvals,x_sums_repeated), 1)

top_k_yvals = tf.gather(y_target_train, top_k_indices)
prediction = tf.squeeze(tf.matmul(x_val_weights,top_k_yvals), axis=[1])

# Calculate MSE
mse = tf.div(tf.reduce_sum(tf.square(tf.subtract(prediction, y_target_test))), batch_size)

# Calculate how many loops over training data
num_loops = int(np.ceil(len(x_vals_test)/batch_size))
Example #20
0
 def testInvalidShape(self):
   with self.assertRaisesRegexp(ValueError, "must have rank at least 2"):
     tf.batch_matrix_diag_part(0)
   with self.assertRaisesRegexp(ValueError, r"Dimensions .* not compatible"):
     tf.batch_matrix_diag_part([[0, 1], [1, 0], [0, 0]])
Example #21
0
 def testInvalidShape(self):
   with self.assertRaisesRegexp(ValueError, "must be at least rank 2"):
     tf.batch_matrix_diag_part(0)
   with self.assertRaisesRegexp(ValueError, r"Dimensions must be equal"):
     tf.batch_matrix_diag_part([[0, 1], [1, 0], [0, 0]])
Example #22
0
def vec2trimat(vec, dim):
    L = tf.reshape(vec, [-1, dim, dim])
    L = tf.batch_matrix_band_part(L, -1, 0) - tf.batch_matrix_diag(tf.batch_matrix_diag_part(L)) + \
        tf.batch_matrix_diag(tf.exp(tf.batch_matrix_diag_part(L)))
    return L
Example #23
0
Out[6]: array([1, 2, 3, 1, 2, 3], dtype=int32)
sess.run(tp1)
Out[7]: 
array([[1, 2, 3, 1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6, 4, 5, 6],
       [1, 2, 3, 1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6, 4, 5, 6]], dtype=int32)
"""


# Declare weighted distance metric
# Weighted - L2 = sqrt((x-y)^T * A * (x-y))
subtraction_term =  tf.sub(x_data_train, tf.expand_dims(x_data_test,1))
first_product = tf.batch_matmul(subtraction_term, tf.tile(tf.expand_dims(weight_matrix,0), [batch_size,1,1]))
second_product = tf.batch_matmul(first_product, tf.transpose(subtraction_term, perm=[0,2,1]))
distance = tf.sqrt(tf.batch_matrix_diag_part(second_product))

# Predict: Get min distance index (Nearest neighbor)
top_k_xvals, top_k_indices = tf.nn.top_k(tf.neg(distance), k=k)
x_sums = tf.expand_dims(tf.reduce_sum(top_k_xvals, 1),1)
x_sums_repeated = tf.matmul(x_sums,tf.ones([1, k], tf.float32))
x_val_weights = tf.expand_dims(tf.div(top_k_xvals,x_sums_repeated), 1)

top_k_yvals = tf.gather(y_target_train, top_k_indices)
prediction = tf.squeeze(tf.batch_matmul(x_val_weights,top_k_yvals), squeeze_dims=[1])

# Calculate MSE
mse = tf.div(tf.reduce_sum(tf.square(tf.sub(prediction, y_target_test))), batch_size)

# Calculate how many loops over training data
num_loops = int(np.ceil(len(x_vals_test)/batch_size))