Ejemplo n.º 1
0
 def testTTMatTimesTTMatBroadcasting(self):
   # Multiply a batch of TT-matrices by another batch of TT-matrices with
   # broadcasting.
   left_shape = (2, 3)
   sum_shape = (4, 3)
   right_shape = (4, 4)
   with self.test_session() as sess:
     tt_mat_1 = initializers.random_matrix_batch((left_shape, sum_shape),
                                                 tt_rank=3, batch_size=3,
                                                 dtype=self.dtype)
     tt_mat_2 = initializers.random_matrix_batch((sum_shape, right_shape),
                                                 dtype=self.dtype)
     # TT-batch by one element TT-batch
     res_actual = ops.matmul(tt_mat_1, tt_mat_2)
     res_actual = ops.full(res_actual)
     # TT by TT-batch.
     res_actual2 = ops.matmul(ops.transpose(tt_mat_2[0]), ops.transpose(tt_mat_1))
     res_actual2 = ops.full(ops.transpose(res_actual2))
     res_desired = tf.einsum('oij,jk->oik', ops.full(tt_mat_1),
                             ops.full(tt_mat_2[0]))
     to_run = [res_actual, res_actual2, res_desired]
     res_actual_val, res_actual2_val, res_desired_val = sess.run(to_run)
     self.assertAllClose(res_actual_val, res_desired_val, atol=1e-5, rtol=1e-5)
     self.assertAllClose(res_actual2_val, res_desired_val, atol=1e-5,
                         rtol=1e-5)
Ejemplo n.º 2
0
  def testHessianVectorProduct(self):
    w = initializers.random_matrix(([5] * 3, None), dtype=self.dtype)
    A = initializers.random_matrix(([5] * 3, [5] * 3), dtype=self.dtype)
    x = initializers.random_matrix(([5] * 3, None), dtype=self.dtype)
    z = initializers.random_matrix(([5] * 3, None), dtype=self.dtype)
    projected_vector = riemannian.project(z, x)

    def func1(x):
      return 0.5 * ops.flat_inner(x, w) ** 2
    # Grad: <x, w> w
    # Hessian: w w.T
    # Hessian by vector: w <w, P_x z>
    desired1 = riemannian.project(w * ops.flat_inner(projected_vector, w), x)
    desired1 = ops.full(desired1)
    self._TestSingleHessianByVector(func1, x, z, desired1)

    def func2(x):
      return ops.bilinear_form(A, x, x)
    # Hessian of <x, Ax> is A + A.T
    hessian_by_vector = ops.matmul(ops.transpose(A) + A, projected_vector)
    desired2 = ops.full(riemannian.project(hessian_by_vector, x))
    self._TestSingleHessianByVector(func1, x, z, desired1)

    def func3(x):
      # A function which is not invariant to different representations of the
      # same tensor, i.e. it does not even have a Riemannian gradient or
      # hessian.
      return tf.add_n([tf.reduce_sum(c) for c in x.tt_cores]) ** 2
    with self.assertRaises(tf.errors.InvalidArgumentError):
      actual3 = ops.full(autodiff.hessian_vector_product(func3, x, z))
      self.evaluate(actual3)
Ejemplo n.º 3
0
  def _get_mu(self, ranks, x, y):
    """Initializes latent inputs expectations mu.

    Either loads pretrained values of tt-cores of mu, or initializes it
    according to optimal formulas from the given data.

    Args:
      ranks: tt-ranks of mu
      x: features of a batch of objects
      y: targets of a batch of objects
    """
    # TODO: test if this is needed.
    w = self.inputs.interpolate_on_batch(self.cov.project(x))
    Sigma = ops.tt_tt_matmul(self.sigma_l, ops.transpose(self.sigma_l))
    temp = ops.tt_tt_matmul(w, y)        
    anc = ops.tt_tt_matmul(Sigma, temp) 
    res = TensorTrain([core[0, :, :, :, :] for core in anc.tt_cores], 
            tt_ranks=[1]*(anc.ndims()+1))
    res = res
    for i in range(1, anc.get_shape()[0]):
      elem = TensorTrain([core[i, :, :, :, :] for core in anc.tt_cores],
              tt_ranks=[1]*(anc.ndims()+1))
      res = ops.add(res, elem)
    mu_ranks = [1] + [ranks] * (res.ndims() - 1) + [1]
    return t3f.get_variable('tt_mu', initializer=TensorTrain(res.tt_cores, 
                                res.get_raw_shape(), mu_ranks))
Ejemplo n.º 4
0
    def _get_mus(self, mu_ranks):
        """Initialize expectations of var distribution over unary potentials.
       
    Args:
      mu_ranks: TT-ranks of mus.
    """

        # TODO: is this a good initialization?
        x_init = tf.random_normal([mu_ranks, self.d], dtype=tf.float64)
        y_init = tf.random_normal([mu_ranks], dtype=tf.float64)

        w = self.inputs.interpolate_on_batch(x_init)
        y_init_cores = [tf.reshape(y_init, (-1, 1, 1, 1, 1))]
        for core_idx in range(1, w.ndims()):
            y_init_cores += [tf.ones((mu_ranks, 1, 1, 1, 1), dtype=tf.float64)]
            y_init = t3f.TensorTrainBatch(y_init_cores)

        Sigma = ops.tt_tt_matmul(self.sigma_ls[0],
                                 ops.transpose(self.sigma_ls[0]))
        res_batch = t3f.tt_tt_matmul(Sigma, t3f.tt_tt_matmul(w, y_init))
        res = res_batch[0]
        for i in range(1, mu_ranks):
            res = res + res_batch[i]

        mu_ranks = [1] + [mu_ranks] * (res.ndims() - 1) + [1]
        mu_cores = []
        for core in res.tt_cores:
            mu_cores.append(
                tf.tile(core[None, ...], [self.n_labels, 1, 1, 1, 1]))
        return t3f.get_variable('tt_mus',
                                initializer=TensorTrainBatch(
                                    mu_cores, res.get_raw_shape(), mu_ranks))
Ejemplo n.º 5
0
    def _unary_complexity_penalty(self):
        """Computes the complexity penalty for unary potentials.

    This function computes KL-divergence between prior and variational 
    distribution over the values of GPs at inducing inputs.

    Returns:
      A scalar `tf.Tensor` containing the complexity penalty for GPs 
      determining unary potentials.
    """
        # TODO: test this
        mus = self.mus
        sigma_ls = _kron_tril(self.sigma_ls)
        sigmas = ops.tt_tt_matmul(sigma_ls, ops.transpose(sigma_ls))
        sigmas_logdet = _kron_logdet(sigma_ls)

        K_mms = self._K_mms()
        K_mms_inv = kron.inv(K_mms)
        K_mms_logdet = kron.slog_determinant(K_mms)[1]

        penalty = 0
        penalty += -K_mms_logdet
        penalty += sigmas_logdet
        penalty += -ops.tt_tt_flat_inner(sigmas, K_mms_inv)
        penalty += -ops.tt_tt_flat_inner(mus, ops.tt_tt_matmul(K_mms_inv, mus))
        return tf.reduce_sum(penalty) / 2
Ejemplo n.º 6
0
 def testBilinearFormTwoMat(self):
     # Test bilinear_form_two_mat.
     shape_list = (((2, 2), (3, 4)), ((2, 3, 4), (2, 2, 2)))
     rank_list = (1, 2)
     for tensor_shape in shape_list:
         for rank in rank_list:
             A = initializers.random_matrix(tensor_shape,
                                            tt_rank=rank,
                                            dtype=self.dtype)
             B = initializers.random_matrix(tensor_shape,
                                            tt_rank=rank,
                                            dtype=self.dtype)
             B = ops.transpose(B)
             x = initializers.random_matrix((tensor_shape[0], None),
                                            tt_rank=rank,
                                            dtype=self.dtype)
             y = initializers.random_matrix((tensor_shape[0], None),
                                            tt_rank=rank,
                                            dtype=self.dtype)
             res_actual = ops.bilinear_form_two_mat(x, A, B, y)
             vars = [
                 res_actual,
                 ops.full(x),
                 ops.full(A),
                 ops.full(B),
                 ops.full(y)
             ]
             res_actual_val, x_val, A_val, B_val, y_val = self.evaluate(
                 vars)
             res_desired = x_val.T.dot(A_val).dot(B_val).dot(y_val)
             self.assertAllClose(res_actual_val,
                                 np.squeeze(res_desired),
                                 atol=1e-5,
                                 rtol=1e-5)
Ejemplo n.º 7
0
 def testTranspose(self):
     # Transpose a batch of TT-matrices.
     tt = initializers.random_matrix_batch(((2, 3, 4), (2, 2, 2)),
                                           batch_size=2,
                                           dtype=self.dtype)
     res_actual = ops.full(ops.transpose(tt))
     res_actual_val, tt_val = self.evaluate([res_actual, ops.full(tt)])
     self.assertAllClose(tt_val.transpose((0, 2, 1)), res_actual_val)
Ejemplo n.º 8
0
 def testTranspose(self):
     # Transpose a batch of TT-matrices.
     with self.test_session() as sess:
         tt = initializers.random_matrix_batch(((2, 3, 4), (2, 2, 2)),
                                               batch_size=2)
         res_actual = ops.full(ops.transpose(tt))
         res_actual_val, tt_val = sess.run([res_actual, ops.full(tt)])
         self.assertAllClose(tt_val.transpose((0, 2, 1)), res_actual_val)
Ejemplo n.º 9
0
 def testTranspose(self):
     # Transpose a TT-matrix.
     shape_list = (((2, 2), (3, 4)), ((2, 3, 4), (2, 2, 2)))
     rank_list = (1, 2)
     with self.test_session() as sess:
         for tensor_shape in shape_list:
             for rank in rank_list:
                 tt = initializers.random_matrix(tensor_shape, tt_rank=rank)
                 res_actual = ops.full(ops.transpose(tt))
                 res_actual_val, tt_val = sess.run(
                     [res_actual, ops.full(tt)])
                 self.assertAllClose(tt_val.transpose(), res_actual_val)
Ejemplo n.º 10
0
 def testTranspose(self):
     # Transpose a TT-matrix.
     shape_list = (((2, 2), (3, 4)), ((2, 3, 4), (2, 2, 2)))
     rank_list = (1, 2)
     for tensor_shape in shape_list:
         for rank in rank_list:
             tt = initializers.random_matrix(tensor_shape,
                                             tt_rank=rank,
                                             dtype=self.dtype)
             res_actual = ops.full(ops.transpose(tt))
             res_actual_val, tt_val = self.evaluate(
                 [res_actual, ops.full(tt)])
             self.assertAllClose(tt_val.transpose(), res_actual_val)
Ejemplo n.º 11
0
    def _predict_process_values(self, x, with_variance=False, test=False):
        w = self.inputs.interpolate_on_batch(self.cov.project(x, test=test))

        mean = batch_ops.pairwise_flat_inner(w, self.mus)
        if not with_variance:
            return mean
        K_mms = self._K_mms()

        sigma_ls = _kron_tril(self.sigma_ls)
        variances = []
        sigmas = ops.tt_tt_matmul(sigma_ls, ops.transpose(sigma_ls))
        variances = pairwise_quadratic_form(sigmas, w, w)
        variances -= pairwise_quadratic_form(K_mms, w, w)
        variances += self.cov.cov_0()[None, :]
        return mean, variances
Ejemplo n.º 12
0
 def _get_mus(self, ranks, x_init, y_init):
     w = self.inputs.interpolate_on_batch(self.cov.project(x_init))
     Sigma = ops.tt_tt_matmul(self.sigma_ls[0], ops.transpose(self.sigma_ls[0]))
     temp = ops.tt_tt_matmul(w, y_init)        
     anc = ops.tt_tt_matmul(Sigma, temp) 
     res = TensorTrain([core[0, :, :, :, :] for core in anc.tt_cores], 
             tt_ranks=[1]*(anc.ndims()+1))
     res = res
     for i in range(1, anc.get_shape()[0]):
         elem = TensorTrain([core[i, :, :, :, :] for core in anc.tt_cores],
                 tt_ranks=[1]*(anc.ndims()+1))
         res = ops.add(res, elem)
     mu_ranks = [1] + [ranks] * (res.ndims() - 1) + [1]
     mu_cores = []
     for core in res.tt_cores:
         mu_cores.append(tf.tile(core[None, ...], [self.n_class, 1, 1, 1, 1]))
     return t3f.get_variable('tt_mus', 
         initializer=TensorTrainBatch(mu_cores, res.get_raw_shape(), mu_ranks))
Ejemplo n.º 13
0
    def complexity_penalty(self):
        """Returns the complexity penalty term for ELBO. 
        """
        mus = self.mus
        sigma_ls = _kron_tril(self.sigma_ls)
        sigmas = ops.tt_tt_matmul(sigma_ls, ops.transpose(sigma_ls))
        sigmas_logdet = _kron_logdet(sigma_ls)

        K_mms = self._K_mms()
        K_mms_inv = kron.inv(K_mms)
        K_mms_logdet = kron.slog_determinant(K_mms)[1]

        penalty = 0
        penalty += - K_mms_logdet
        penalty += sigmas_logdet
        penalty += - ops.tt_tt_flat_inner(sigmas, K_mms_inv)
        penalty += - ops.tt_tt_flat_inner(mus, 
                               ops.tt_tt_matmul(K_mms_inv, mus))
        return penalty / 2
Ejemplo n.º 14
0
  def complexity_penalty(self):
    """Returns the complexity penalty term for ELBO of different GP models. 
    """
    mu = self.mu
    sigma_l = _kron_tril(self.sigma_l)
    sigma = ops.tt_tt_matmul(sigma_l, ops.transpose(sigma_l))
    sigma_logdet = _kron_logdet(sigma_l)

    K_mm = self.K_mm()
    K_mm_inv = kron.inv(K_mm)
    K_mm_logdet = kron.slog_determinant(K_mm)[1]

    elbo = 0
    elbo += - K_mm_logdet
    elbo += sigma_logdet
    elbo += - ops.tt_tt_flat_inner(sigma, K_mm_inv)
    elbo += - ops.tt_tt_flat_inner(mu, 
                           ops.tt_tt_matmul(K_mm_inv, mu))
    return elbo / 2
Ejemplo n.º 15
0
  def predict_process_value(self, x, with_variance=False):
    """Predicts the value of the process at point x.

    Args:
      x: data features
      with_variance: if True, returns process variance at x
    """
    mu = self.mu
    w = self.inputs.interpolate_on_batch(self.cov.project(x))

    mean = ops.tt_tt_flat_inner(w, mu)
    if not with_variance:
      return mean
    K_mm = self.K_mm()
    variance = self.cov.cov_0() 
    sigma_l_w = ops.tt_tt_matmul(ops.transpose(self.sigma_l), w)
    variance += ops.tt_tt_flat_inner(sigma_l_w, sigma_l_w)
    variance -= ops.tt_tt_flat_inner(w, ops.tt_tt_matmul(K_mm, w))
    return mean, variance
Ejemplo n.º 16
0
  def testGradients(self):
    w = initializers.random_matrix(([5] * 3, None), dtype=self.dtype)
    A = initializers.random_matrix(([5] * 3, [5] * 3), dtype=self.dtype)
    x = initializers.random_matrix(([5] * 3, None), dtype=self.dtype)

    def func1(x):
      return 0.5 * ops.flat_inner(x, w) ** 2
    desired1 = ops.full(riemannian.project(w, x) * ops.flat_inner(x, w))

    self._TestSingleGradient(func1, x, desired1)

    def func2(x):
      return ops.bilinear_form(A, x, x)
    grad = ops.matmul(ops.transpose(A) + A, x)
    desired2 = ops.full(riemannian.project(grad, x))
    self._TestSingleGradient(func2, x, desired2)

    def func3(x):
      # A function which is not invariant to different representations of the
      # same tensor, i.e. it does not even have a Riemannian gradient.
      return tf.add_n([tf.reduce_sum(c) for c in x.tt_cores]) ** 2
    with self.assertRaises(tf.errors.InvalidArgumentError):
      actual3 = ops.full(autodiff.gradients(func3, x))
      self.evaluate(actual3)
Ejemplo n.º 17
0
  def elbo(self, w, y):
    '''Evidence lower bound.
    
    Args:
      w: interpolation vector for the current batch.
      y: target values for the current batch.
    '''
      
    l = tf.cast(tf.shape(y)[0], tf.float64) # batch size
    N = tf.cast(self.N, dtype=tf.float64) 

    y = tf.reshape(y, [-1])
    
    mu = self.gp.mu
    sigma_l = _kron_tril(self.gp.sigma_l)
    sigma = ops.tt_tt_matmul(sigma_l, ops.transpose(sigma_l))
    
    sigma_n = self.gp.cov.noise_variance()
    
    K_mm = self.gp.K_mm()

    tilde_K_ii = l * self.gp.cov.cov_0()
    tilde_K_ii -= tf.reduce_sum(ops.tt_tt_flat_inner(w, 
                                         ops.tt_tt_matmul(K_mm, w)))

    elbo = 0
    elbo -= tf.reduce_sum(tf.square(y - ops.tt_tt_flat_inner(w, mu)))
    elbo -= tilde_K_ii 
    # TODO: wtf?
#    elbo -= ops.tt_tt_flat_inner(w, ops.tt_tt_matmul(sigma, w))
    elbo -= tf.reduce_sum(ops.tt_tt_flat_inner(w, ops.tt_tt_matmul(sigma, w)))
    elbo /= 2 * sigma_n**2 * l
    elbo += self.gp.complexity_penalty() / N
    # TODO: wtf?
#    elbo -=  tf.log(tf.abs(sigma_n))  
    return -elbo[0]
Ejemplo n.º 18
0
def pairwise_flat_inner(tt_1, tt_2, matrix=None):
    """Computes all scalar products between two batches of TT-objects.

  If matrix is None, computes
    res[i, j] = t3f.flat_inner(tt_1[i], tt_2[j]).

  If matrix is present, computes
      res[i, j] = t3f.flat_inner(tt_1[i], t3f.matmul(matrix, tt_2[j]))
    or more shortly
      res[i, j] = tt_1[i]^T * matrix * tt_2[j]
    but is more efficient.

  Args:
    tt_1: TensorTrainBatch.
    tt_2: TensorTrainBatch.
    matrix: None, or TensorTrain matrix.

  Returns:
    tf.tensor with the matrix of pairwise scalar products (flat inners).
      
  Complexity:
    If the matrix is not present, the complexity is O(batch_size^2 d r^3 n)
      where d is the number of
      TT-cores (tt_vectors.ndims()), r is the largest TT-rank
        max(tt_vectors.get_tt_rank())
      and n is the size of the axis dimension, e.g.
        for a tensor of size 4 x 4 x 4, n is 4;
        for a 9 x 64 matrix of raw shape (3, 3, 3) x (4, 4, 4) n is 12
      A more precise complexity is
        O(batch_size^2 d r1 r2 n max(r1, r2))
      where r1 is the largest TT-rank of tt_a
      and r2 is the largest TT-rank of tt_b.
    If the matrix is present, the complexity is
        O(batch_size^2 d R r1 r2 (n r1 + n m R + m r2))
      where
      the matrix is of raw-shape (n, n, ..., n) x (m, m, ..., m) and TT-rank R;
      tt_1 is of shape (n, n, ..., n) and is of the TT-rank r1;
      tt_2 is of shape (m, m, ..., m) and is of the TT-rank r2;
  """
    ndims = tt_1.ndims()
    if matrix is None:
        curr_core_1 = tt_1.tt_cores[0]
        curr_core_2 = tt_2.tt_cores[0]
        mode_string = 'ij' if tt_1.is_tt_matrix() else 'i'
        einsum_str = 'pa{0}b,qc{0}d->pqbd'.format(mode_string)
        res = tf.einsum(einsum_str, curr_core_1, curr_core_2)
        for core_idx in range(1, ndims):
            curr_core_1 = tt_1.tt_cores[core_idx]
            curr_core_2 = tt_2.tt_cores[core_idx]
            einsum_str = 'pqac,pa{0}b,qc{0}d->pqbd'.format(mode_string)
            res = tf.einsum(einsum_str, res, curr_core_1, curr_core_2)
    else:
        # res[i, j] = tt_1[i] ^ T * matrix * tt_2[j]
        if not tt_1.is_tt_matrix() or not tt_2.is_tt_matrix(
        ) or not matrix.is_tt_matrix():
            raise ValueError(
                'When passing three arguments to pairwise_flat_inner, '
                'the first 2 of them should be TT-vecors and the last '
                'should be a TT-matrix. Got %s, %s, and %s instead.' %
                (tt_1, tt_2, matrix))
        matrix_shape = matrix.get_raw_shape()
        if not tt_1.get_raw_shape()[0].is_compatible_with(matrix_shape[0]):
            raise ValueError(
                'The shape of the first argument should be compatible '
                'with the shape of the TT-matrix, that is it should be '
                'possible to do the following matmul: '
                'transpose(tt_1) * matrix. Got the first argument '
                '"%s" and matrix "%s"' % (tt_1, matrix))
        if not tt_2.get_raw_shape()[0].is_compatible_with(matrix_shape[1]):
            raise ValueError(
                'The shape of the second argument should be compatible '
                'with the shape of the TT-matrix, that is it should be '
                'possible to do the following matmul: '
                'matrix * tt_2. Got the second argument '
                '"%s" and matrix "%s"' % (tt_2, matrix))

        vectors_1_shape = tt_1.get_shape()
        if vectors_1_shape[2] == 1 and vectors_1_shape[1] != 1:
            # TODO: not very efficient, better to use different order in einsum.
            tt_1 = ops.transpose(tt_1)
        vectors_1_shape = tt_1.get_shape()
        vectors_2_shape = tt_2.get_shape()
        if vectors_2_shape[2] == 1 and vectors_2_shape[1] != 1:
            # TODO: not very efficient, better to use different order in einsum.
            tt_2 = ops.transpose(tt_2)
        vectors_2_shape = tt_2.get_shape()
        if vectors_1_shape[1] != 1:
            # TODO: do something so that in case the shape is undefined on compilation
            # it still works.
            raise ValueError(
                'The tt_vectors_1 argument should be vectors (not '
                'matrices) with shape defined on compilation.')
        if vectors_2_shape[1] != 1:
            # TODO: do something so that in case the shape is undefined on compilation
            # it still works.
            raise ValueError(
                'The tt_vectors_2 argument should be vectors (not '
                'matrices) with shape defined on compilation.')
        curr_core_1 = tt_1.tt_cores[0]
        curr_core_2 = tt_2.tt_cores[0]
        curr_matrix_core = matrix.tt_cores[0]
        # We enumerate the dummy dimension (that takes 1 value) with `k`.
        res = tf.einsum('pakib,cijd,qekjf->pqbdf', curr_core_1,
                        curr_matrix_core, curr_core_2)
        for core_idx in range(1, ndims):
            curr_core_1 = tt_1.tt_cores[core_idx]
            curr_core_2 = tt_2.tt_cores[core_idx]
            curr_matrix_core = matrix.tt_cores[core_idx]
            res = tf.einsum('pqace,pakib,cijd,qekjf->pqbdf', res, curr_core_1,
                            curr_matrix_core, curr_core_2)

    # Squeeze to make the result of size batch_size x batch_size instead of
    # batch_size x batch_size x 1 x 1.
    return tf.squeeze(res)