Exemple #1
0
 def testInvalidShapeAtEval(self):
   with self.test_session(use_gpu=self._use_gpu):
     v = tf.placeholder(dtype=tf.float32)
     with self.assertRaisesOpError("input must be at least 2-dim"):
       tf.matrix_set_diag(v, [v]).eval(feed_dict={v: 0.0})
     with self.assertRaisesOpError(
         r"but received input shape: \[1,1\] and diagonal shape: \[\]"):
       tf.matrix_set_diag([[v]], v).eval(feed_dict={v: 0.0})
  def testRectangular(self):
    with self.test_session(use_gpu=self._use_gpu):
      v = np.array([3.0, 4.0])
      mat = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0]])
      expected = np.array([[3.0, 1.0, 0.0], [1.0, 4.0, 1.0]])
      output = tf.matrix_set_diag(mat, v)
      self.assertEqual((2, 3), output.get_shape())
      self.assertAllEqual(expected, output.eval())

      v = np.array([3.0, 4.0])
      mat = np.array([[0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
      expected = np.array([[3.0, 1.0], [1.0, 4.0], [1.0, 1.0]])
      output = tf.matrix_set_diag(mat, v)
      self.assertEqual((3, 2), output.get_shape())
      self.assertAllEqual(expected, output.eval())
 def _covariance(self):
   p = self.probs * tf.ones_like(
       self.total_count)[..., tf.newaxis]
   return tf.matrix_set_diag(
       -tf.matmul(self._mean_val[..., tf.newaxis],
                  p[..., tf.newaxis, :]),  # outer product
       self._variance())
def random_tril_matrix(
    shape, dtype, force_well_conditioned=False, remove_upper=True):
  """[batch] lower triangular matrix.

  Args:
    shape:  `TensorShape` or Python `list`.  Shape of the returned matrix.
    dtype:  `TensorFlow` `dtype` or Python dtype
    force_well_conditioned:  Python `bool`. If `True`, returned matrix will have
      eigenvalues with modulus in `(1, 2)`.  Otherwise, eigenvalues are unit
      normal random variables.
    remove_upper:  Python `bool`.
      If `True`, zero out the strictly upper triangle.
      If `False`, the lower triangle of returned matrix will have desired
      properties, but will not not have the strictly upper triangle zero'd out.

  Returns:
    `Tensor` with desired shape and dtype.
  """
  with tf.name_scope("random_tril_matrix"):
    # Totally random matrix.  Has no nice properties.
    tril = random_normal(shape, dtype=dtype)
    if remove_upper:
      tril = tf.matrix_band_part(tril, -1, 0)

    # Create a diagonal with entries having modulus in [1, 2].
    if force_well_conditioned:
      maxval = tf.convert_to_tensor(np.sqrt(2.), dtype=dtype.real_dtype)
      diag = random_sign_uniform(
          shape[:-1], dtype=dtype, minval=1., maxval=maxval)
      tril = tf.matrix_set_diag(tril, diag)

    return tril
Exemple #5
0
def CombineArcAndRootPotentials(arcs, roots):
  """Combines arc and root potentials into a single set of potentials.

  Args:
    arcs: [B,N,N] tensor of batched arc potentials.
    roots: [B,N] matrix of batched root potentials.

  Returns:
    [B,N,N] tensor P of combined potentials where
      P_{b,s,t} = s == t ? roots[b,t] : arcs[b,s,t]
  """
  # All arguments must have statically-known rank.
  check.Eq(arcs.get_shape().ndims, 3, 'arcs must be rank 3')
  check.Eq(roots.get_shape().ndims, 2, 'roots must be a matrix')

  # All arguments must share the same type.
  dtype = arcs.dtype.base_dtype
  check.Same([dtype, roots.dtype.base_dtype], 'dtype mismatch')

  roots_shape = tf.shape(roots)
  arcs_shape = tf.shape(arcs)
  batch_size = roots_shape[0]
  num_tokens = roots_shape[1]
  with tf.control_dependencies([
      tf.assert_equal(batch_size, arcs_shape[0]),
      tf.assert_equal(num_tokens, arcs_shape[1]),
      tf.assert_equal(num_tokens, arcs_shape[2])]):
    return tf.matrix_set_diag(arcs, roots)
  def _operator_and_mat_and_feed_dict(self, shape, dtype, use_placeholder):
    shape = list(shape)
    diag_shape = shape[:-1]

    # Upper triangle will be ignored.
    # Use a diagonal that ensures this matrix is well conditioned.
    tril = tf.random_normal(shape=shape, dtype=dtype.real_dtype)
    diag = tf.random_uniform(
        shape=diag_shape, dtype=dtype.real_dtype, minval=2., maxval=3.)
    if dtype.is_complex:
      tril = tf.complex(
          tril, tf.random_normal(shape, dtype=dtype.real_dtype))
      diag = tf.complex(
          diag, tf.random_uniform(
              shape=diag_shape, dtype=dtype.real_dtype, minval=2., maxval=3.))

    tril = tf.matrix_set_diag(tril, diag)

    tril_ph = tf.placeholder(dtype=dtype)

    if use_placeholder:
      # Evaluate the tril here because (i) you cannot feed a tensor, and (ii)
      # tril is random and we want the same value used for both mat and
      # feed_dict.
      tril = tril.eval()
      operator = linalg.LinearOperatorTriL(tril_ph)
      feed_dict = {tril_ph: tril}
    else:
      operator = linalg.LinearOperatorTriL(tril)
      feed_dict = None

    mat = tf.matrix_band_part(tril, -1, 0)

    return operator, mat, feed_dict
Exemple #7
0
  def _sample_n(self, n, seed):
    batch_shape = self.batch_shape_tensor()
    event_shape = self.event_shape_tensor()
    batch_ndims = tf.shape(batch_shape)[0]

    ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
    shape = tf.concat([[n], batch_shape, event_shape], 0)
    stream = seed_stream.SeedStream(seed, salt="Wishart")

    # Complexity: O(nbk**2)
    x = tf.random_normal(
        shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=stream())

    # Complexity: O(nbk)
    # This parametrization is equivalent to Chi2, i.e.,
    # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
    expanded_df = self.df * tf.ones(
        self.scale_operator.batch_shape_tensor(),
        dtype=self.df.dtype.base_dtype)

    g = tf.random_gamma(
        shape=[n],
        alpha=self._multi_gamma_sequence(0.5 * expanded_df, self.dimension),
        beta=0.5,
        dtype=self.dtype,
        seed=stream())

    # Complexity: O(nbk**2)
    x = tf.matrix_band_part(x, -1, 0)  # Tri-lower.

    # Complexity: O(nbk)
    x = tf.matrix_set_diag(x, tf.sqrt(g))

    # Make batch-op ready.
    # Complexity: O(nbk**2)
    perm = tf.concat([tf.range(1, ndims), [0]], 0)
    x = tf.transpose(x, perm)
    shape = tf.concat([batch_shape, [event_shape[0]], [-1]], 0)
    x = tf.reshape(x, shape)

    # Complexity: O(nbM) where M is the complexity of the operator solving a
    # vector system. For LinearOperatorLowerTriangular, each matmul is O(k^3) so
    # this step has complexity O(nbk^3).
    x = self.scale_operator.matmul(x)

    # Undo make batch-op ready.
    # Complexity: O(nbk**2)
    shape = tf.concat([batch_shape, event_shape, [n]], 0)
    x = tf.reshape(x, shape)
    perm = tf.concat([[ndims - 1], tf.range(0, ndims - 1)], 0)
    x = tf.transpose(x, perm)

    if not self.input_output_cholesky:
      # Complexity: O(nbk**3)
      x = tf.matmul(x, x, adjoint_b=True)

    return x
Exemple #8
0
 def testVector(self):
   with self.test_session(use_gpu=self._use_gpu):
     v = np.array([1.0, 2.0, 3.0])
     mat = np.array([[0.0, 1.0, 0.0],
                     [1.0, 0.0, 1.0],
                     [1.0, 1.0, 1.0]])
     mat_set_diag = np.array([[1.0, 1.0, 0.0],
                              [1.0, 2.0, 1.0],
                              [1.0, 1.0, 3.0]])
     output = tf.matrix_set_diag(mat, v)
     self.assertEqual((3, 3), output.get_shape())
     self.assertAllEqual(mat_set_diag, output.eval())
def sample(means,
           logvars,
           latent_dim,
           iaf=True,
           kl_min=None,
           anneal=False,
           kl_rate=None,
           dtype=tf.float32):
  """Perform sampling and calculate KL divergence.

  Args:
    means: tensor of shape (batch_size, latent_dim)
    logvars: tensor of shape (batch_size, latent_dim)
    latent_dim: dimension of latent space.
    iaf: perform linear IAF or not.
    kl_min: lower bound for KL divergence.
    anneal: perform KL cost annealing or not.
    kl_rate: KL divergence is multiplied by kl_rate if anneal is set to True.
  Returns:
    latent_vector: latent variable after sampling. A vector of shape (batch_size, latent_dim).
    kl_obj: objective to be minimized for the KL term.
    kl_cost: real KL divergence.
  """
  if iaf:
    with tf.variable_scope('iaf'):
      prior = DiagonalGaussian(tf.zeros_like(means, dtype=dtype),
              tf.zeros_like(logvars, dtype=dtype))
      posterior = DiagonalGaussian(means, logvars)
      z = posterior.sample

      logqs = posterior.logps(z)
      L = tf.get_variable("inverse_cholesky", [latent_dim, latent_dim], dtype=dtype, initializer=tf.zeros_initializer)
      diag_one = tf.ones([latent_dim], dtype=dtype)
      L = tf.matrix_set_diag(L, diag_one)
      mask = np.tril(np.ones([latent_dim,latent_dim]))
      L = L * mask
      latent_vector = tf.matmul(z, L)
      logps = prior.logps(latent_vector)
      kl_cost = logqs - logps
  else:
    noise = tf.random_normal(tf.shape(mean))
    sample = mean + tf.exp(0.5 * logvar) * noise
    kl_cost = -0.5 * (logvars - tf.square(means) -
        tf.exp(logvars) + 1.0)
  kl_ave = tf.reduce_mean(kl_cost, [0]) #mean of kl_cost over batches
  kl_obj = kl_cost = tf.reduce_sum(kl_ave)
  if kl_min:
    kl_obj = tf.reduce_sum(tf.maximum(kl_ave, kl_min))
  if anneal:
    kl_obj = kl_obj * kl_rate

  return latent_vector, kl_obj, kl_cost #both kl_obj and kl_cost are scalar
Exemple #10
0
 def testGrad(self):
   shapes = ((3, 4, 4), (7, 4, 8, 8))
   with self.test_session(use_gpu=self._use_gpu):
     for shape in shapes:
       x = tf.constant(np.random.rand(*shape), dtype=tf.float32)
       x_diag = tf.constant(np.random.rand(*shape[:-1]), dtype=tf.float32)
       y = tf.matrix_set_diag(x, x_diag)
       error_x = tf.test.compute_gradient_error(x, x.get_shape().as_list(),
                                                y, y.get_shape().as_list())
       self.assertLess(error_x, 1e-4)
       error_x_diag = tf.test.compute_gradient_error(
           x_diag, x_diag.get_shape().as_list(),
           y, y.get_shape().as_list())
       self.assertLess(error_x_diag, 1e-4)
Exemple #11
0
 def testGradWithNoShapeInformation(self):
   with self.test_session(use_gpu=self._use_gpu) as sess:
     v = tf.placeholder(dtype=tf.float32)
     mat = tf.placeholder(dtype=tf.float32)
     grad_input = tf.placeholder(dtype=tf.float32)
     output = tf.matrix_set_diag(mat, v)
     grads = tf.gradients(output, [mat, v], grad_ys=grad_input)
     grad_input_val = np.random.rand(3, 3).astype(np.float32)
     grad_vals = sess.run(
         grads, feed_dict={v: 2 * np.ones(3), mat: np.ones((3, 3)),
                           grad_input: grad_input_val})
     self.assertAllEqual(np.diag(grad_input_val), grad_vals[1])
     self.assertAllEqual(grad_input_val - np.diag(np.diag(grad_input_val)),
                         grad_vals[0])
Exemple #12
0
 def fit(self, x=None, y=None):
   # p(coeffs | x, y) = Normal(coeffs |
   #   mean = (1/noise_variance) (1/noise_variance x^T x + I)^{-1} x^T y,
   #   covariance = (1/noise_variance x^T x + I)^{-1})
   # TODO(trandustin): We newly fit the data at each call. Extend to do
   # Bayesian updating.
   kernel_matrix = tf.matmul(x, x, transpose_a=True) / self.noise_variance
   coeffs_precision = tf.matrix_set_diag(
       kernel_matrix, tf.matrix_diag_part(kernel_matrix) + 1.)
   coeffs_precision_tril = tf.linalg.cholesky(coeffs_precision)
   self.coeffs_precision_tril_op = tf.linalg.LinearOperatorLowerTriangular(
       coeffs_precision_tril)
   self.coeffs_mean = self.coeffs_precision_tril_op.solvevec(
       self.coeffs_precision_tril_op.solvevec(tf.einsum('nm,n->m', x, y)),
       adjoint=True) / self.noise_variance
   # TODO(trandustin): To be fully Keras-compatible, return History object.
   return
Exemple #13
0
 def testGrad(self):
     shapes = ((3, 4, 4), (7, 4, 8, 8))
     with self.test_session(use_gpu=self._use_gpu):
         for shape in shapes:
             x = tf.constant(np.random.rand(*shape), dtype=tf.float32)
             x_diag = tf.constant(np.random.rand(*shape[:-1]),
                                  dtype=tf.float32)
             y = tf.matrix_set_diag(x, x_diag)
             error_x = tf.test.compute_gradient_error(
                 x,
                 x.get_shape().as_list(), y,
                 y.get_shape().as_list())
             self.assertLess(error_x, 1e-4)
             error_x_diag = tf.test.compute_gradient_error(
                 x_diag,
                 x_diag.get_shape().as_list(), y,
                 y.get_shape().as_list())
             self.assertLess(error_x_diag, 1e-4)
def future_lookup_prevention_mask(keys_len,
                                  query_len,
                                  prevail_val=1.0,
                                  cancel_val=0.0,
                                  include_diagonal=False):
    ones = tf.ones(keys_len * (keys_len + 1) // 2)
    zero_ones_mask = tf.ones(
        (keys_len, keys_len)) - tf.contrib.distributions.fill_triangular(
            ones, upper=True)
    prevail_mask = zero_ones_mask * prevail_val
    cancel_mask = tf.contrib.distributions.fill_triangular(
        ones * tf.constant(cancel_val, dtype=tf.float32), upper=True)
    mask = prevail_mask + cancel_mask
    if include_diagonal:
        mask = tf.matrix_set_diag(
            mask,
            tf.ones(keys_len, dtype=tf.float32) * prevail_val)
    return mask[(-query_len):, :]
  def testRectangularBatch(self):
    with self.test_session(use_gpu=self._use_gpu):
      v_batch = np.array([[-1.0, -2.0],
                          [-4.0, -5.0]])
      mat_batch = np.array(
          [[[1.0, 0.0, 3.0],
            [0.0, 2.0, 0.0]],
           [[4.0, 0.0, 4.0],
            [0.0, 5.0, 0.0]]])

      mat_set_diag_batch = np.array(
          [[[-1.0, 0.0, 3.0],
            [0.0, -2.0, 0.0]],
           [[-4.0, 0.0, 4.0],
            [0.0, -5.0, 0.0]]])
      output = tf.matrix_set_diag(mat_batch, v_batch)
      self.assertEqual((2, 2, 3), output.get_shape())
      self.assertAllEqual(mat_set_diag_batch, output.eval())
Exemple #16
0
 def fit(self, x=None, y=None):
     # p(coeffs | x, y) = Normal(coeffs |
     #   mean = (1/noise_variance) (1/noise_variance x^T x + I)^{-1} x^T y,
     #   covariance = (1/noise_variance x^T x + I)^{-1})
     # TODO(trandustin): We newly fit the data at each call. Extend to do
     # Bayesian updating.
     kernel_matrix = tf.matmul(x, x, transpose_a=True) / self.noise_variance
     coeffs_precision = tf.matrix_set_diag(
         kernel_matrix,
         tf.matrix_diag_part(kernel_matrix) + 1.)
     coeffs_precision_tril = tf.linalg.cholesky(coeffs_precision)
     self.coeffs_precision_tril_op = tf.linalg.LinearOperatorLowerTriangular(
         coeffs_precision_tril)
     self.coeffs_mean = self.coeffs_precision_tril_op.solvevec(
         self.coeffs_precision_tril_op.solvevec(tf.einsum('nm,n->m', x, y)),
         adjoint=True) / self.noise_variance
     # TODO(trandustin): To be fully Keras-compatible, return History object.
     return
Exemple #17
0
def KLdivergence(P, Y, low_dim=2):
    dtype = P.dtype
    with tf.Session():
        alpha = low_dim - 1.
        sum_Y = tf.reduce_sum(tf.square(Y), 1)
        eps = tf.Variable(10e-15, dtype=dtype, name="eps").initialized_value()
        Q = tf.reshape(sum_Y, [-1, 1]) + -2 * tf.matmul(Y, tf.transpose(Y))
        Q = sum_Y + Q / alpha
        Q = tf.pow(1 + Q, -(alpha + 1) / 2)
        #Q = Q * (1 - tf.diag(tf.ones(self.batch_size, dtype=dtype)))
        Q_d = tf.diag_part(Q)
        Q_d = Q_d - Q_d
        Q = tf.matrix_set_diag(Q, Q_d)
        Q = Q / tf.reduce_sum(Q)
        Q = tf.maximum(Q, eps)
        C = tf.log((P + eps) / (Q + eps))
        C = tf.reduce_sum(P * C)
        return C
Exemple #18
0
 def testGradWithNoShapeInformation(self):
     with self.test_session(use_gpu=self._use_gpu) as sess:
         v = tf.placeholder(dtype=tf.float32)
         mat = tf.placeholder(dtype=tf.float32)
         grad_input = tf.placeholder(dtype=tf.float32)
         output = tf.matrix_set_diag(mat, v)
         grads = tf.gradients(output, [mat, v], grad_ys=grad_input)
         grad_input_val = np.random.rand(3, 3).astype(np.float32)
         grad_vals = sess.run(grads,
                              feed_dict={
                                  v: 2 * np.ones(3),
                                  mat: np.ones((3, 3)),
                                  grad_input: grad_input_val
                              })
         self.assertAllEqual(np.diag(grad_input_val), grad_vals[1])
         self.assertAllEqual(
             grad_input_val - np.diag(np.diag(grad_input_val)),
             grad_vals[0])
def kl_loss(y_true, y_pred, alpha=1.0, batch_size=None, num_perplexities=None, _eps=DEFAULT_EPS):
    """ Kullback-Leibler Loss function (Tensorflow)
    between the "true" output and the "predicted" output
    Parameters
    ----------
    y_true : 2d array_like (N, N*P)
        Should be the P matrix calculated from input data.
        Differences in input points using a Gaussian probability distribution
        Different P (perplexity) values stacked along dimension 1
    y_pred : 2d array_like (N, output_dims)
        Output of the neural network. We will calculate
        the Q matrix based on this output
    alpha : float, optional
        Parameter used to calculate Q. Default 1.0
    batch_size : int, required
        Number of samples per batch. y_true.shape[0]
    num_perplexities : int, required
        Number of perplexities stacked along axis 1
    Returns
    -------
    kl_loss : tf.Tensor, scalar value
        Kullback-Leibler divergence P_ || Q_

    """
    P_ = y_true
    Q_ = _make_Q(y_pred, alpha, batch_size)

    _tf_eps = tf.constant(_eps, dtype=P_.dtype)

    kls_per_beta = []
    components = tf.split(P_, num_perplexities, axis=1, name='split_perp')
    for cur_beta_P in components:
        #yrange = tf.range(zz*batch_size, (zz+1)*batch_size)
        #cur_beta_P = tf.slice(P_, [zz*batch_size, [-1, batch_size])
        #cur_beta_P = P_
        kl_matr = tf.multiply(cur_beta_P, tf.log(cur_beta_P + _tf_eps) - tf.log(Q_ + _tf_eps), name='kl_matr')
        toset = tf.constant(0, shape=[batch_size], dtype=kl_matr.dtype)
        kl_matr_keep = tf.matrix_set_diag(kl_matr, toset)
        kl_total_cost_cur_beta = tf.reduce_sum(kl_matr_keep)
        kls_per_beta.append(kl_total_cost_cur_beta)
    kl_total_cost = tf.add_n(kls_per_beta)
    #kl_total_cost = kl_total_cost_cur_beta

    return kl_total_cost
Exemple #20
0
def full_mvn_loss(truth, h):
    """
    Takes the output of a neural network after it's last activation, performs an affine transform.
    It returns the mahalonobis distances between the targets and the result of the affine transformation, according
    to a parametrized Normal distribution. The log of the determinant of the parametrized
    covariance matrix is meant to be minimized to avoid a trivial optimization.

    :param truth: Actual datapoints to compare against learned distribution
    :param h: output of neural network (after last non-linear transform)
    :return: (tf.Tensor[MB X D], tf.Tensor[MB X 1]) Loss matrix, log_of_determinants of covariance matrices.
    """
    fan_in = h.get_shape().as_list()[1]
    dimension = truth.get_shape().as_list()[1]
    U = 100 * tf.Variable(
        tf.truncated_normal(
            [fan_in, dimension + dimension**2], dtype=tf.float32, name='U'))
    b = tf.Variable(tf.zeros([dimension + dimension**2]))
    y = tf.matmul(h, U) + b
    mu = tf.slice(y, [0, 0], [-1, dimension])  # is MB x dimension
    # is MB x dimension^2 # WARNING WARNING TODO FIX THIS MAGIC NUMBER
    var = tf.slice(y, [0, dimension], [-1, -1]) * 0.0001
    # make it a MB x D x D tensor (var is a superset of the lower triangular
    # part of a Cholesky decomp)
    var = tf.reshape(var, [-1, dimension, dimension])
    var_diag = tf.exp(tf.matrix_diag_part(var)) + \
        1  # WARNING: FIX THIS MAGIC NUMBER
    var = tf.matrix_set_diag(var, var_diag)
    var = tf.matrix_band_part(var, -1, 0)
    z = tf.squeeze(
        tf.matrix_triangular_solve(var,
                                   tf.reshape(truth - mu, [-1, dimension, 1]),
                                   lower=True,
                                   adjoint=False))  # z should be MB x D
    # take row-wise inner products of z, leaving MB x 1 vector
    inner_prods = tf.reduce_sum(tf.square(z), 1)
    # diag_part converts MB x D x D to MB x D, square and log preserve, then
    # sum makes MB x 1
    logdet = tf.reduce_sum(tf.log(tf.square(tf.matrix_diag_part(var))), 1)
    # is MB x 1 ... hard to track of individual features' contributions due to
    # correlations
    loss_column = inner_prods
    tf.add_to_collection('full', var_diag)
    tf.add_to_collection('full', var)
    return tf.reshape(loss_column, [-1, 1]), tf.reshape(logdet, [-1, 1])
Exemple #21
0
def get_scores(thought_vectors, dropout_rate):
    def use_dropout():
        a, b = thought_vectors[0], thought_vectors[1]
        dropout_mask_shape = tf.transpose(tf.shape(a))
        dropout_mask = tf.random_uniform(dropout_mask_shape) > DROPOUT_RATE
        dropout_mask = tf.where(dropout_mask,
                                tf.ones(dropout_mask_shape),
                                tf.zeros(dropout_mask_shape))
        dropout_mask *= (1/dropout_rate)
        a *= dropout_mask
        b *= dropout_mask
        return a, b
    def no_dropout():
        return thought_vectors[0], thought_vectors[1]
    a, b = tf.cond(dropout_rate > 0, use_dropout, no_dropout)

    scores = tf.matmul(a, b, transpose_b=True)
    scores = tf.matrix_set_diag(scores, tf.zeros_like(scores[0]))
    return scores
Exemple #22
0
def contrastive_loss(y_true, y_pred):
    shape = tf.shape(y_true)  # a list: [None, 9, 2]
    dim = tf.mul(shape[1], shape[2])  # dim = prod(9,2) = 18
    y_true = tf.reshape(y_true, [-1, dim])  # -1 means "all"
    y_pred = tf.reshape(y_pred, [-1, dim])  # -1 means "all"
    x2 = tf.expand_dims(tf.transpose(y_pred, [0, 1]), 1)
    y2 = tf.expand_dims(tf.transpose(y_true, [0, 1]), 0)
    diff = y2 - x2
    maximum = tf.maximum(diff, 0.0)
    tensor_pow = tf.square(maximum)
    errors = tf.reduce_sum(tensor_pow, 2)
    diagonal = tf.diag_part(errors)
    cost_s = tf.maximum(0.05 - errors + diagonal, 0.0)
    cost_im = tf.maximum(0.05 - errors + tf.reshape(diagonal, (-1, 1)), 0.0)
    cost_tot = cost_s + cost_im
    zero_diag = tf.mul(diagonal, 0.0)
    cost_tot_diag = tf.matrix_set_diag(cost_tot, zero_diag)
    tot_sum = tf.reduce_sum(cost_tot_diag)
    return tot_sum
    def get_matrix_tree(self, r, A, mask1, mask2):
        L = tf.zeros_like(A)
        L = L - A
        tmp = tf.reduce_sum(A, 1)
        L = tf.matrix_set_diag(L, tmp)

        L_dash = tf.concat([L[:, 1:, :], tf.expand_dims(r, 1)], 1)  #(B*T,S,S)
        L_dash_inv = tf.matrix_inverse(L_dash)
        proot = tf.multiply(r, L_dash_inv[:, :, 0])  ##(B*T,S,)
        pz1 = mask1 * tf.multiply(A,
                                  tf.matrix_transpose(
                                      tf.expand_dims(
                                          tf.matrix_diag_part(L_dash_inv),
                                          2)))  #(B*T,S,S)
        pz2 = mask2 * tf.multiply(A,
                                  tf.matrix_transpose(L_dash_inv))  #(B*T,S,S)
        pz = pz1 - pz2

        return proot, pz
Exemple #24
0
def _get_anchor_positive_triplet_mask(labels):
    """Return a 2D mask where mask[a, p] is True iff a and p are distinct and have same label.
    Args:
        labels: tf.int32 `Tensor` with shape [batch_size]
    Returns:
        mask: tf.bool `Tensor` with shape [batch_size, batch_size]
    """
    with tf.name_scope("anchor_positive_mask") as scope:

        # Check if labels[i] == labels[j]
        # Uses broadcasting where the 1st argument has shape (1, batch_size) and the 2nd (batch_size, 1)
        labels_equal = tf.equal(tf.expand_dims(labels, 0),
                                tf.expand_dims(labels, 1))

        # Remove the diagonal, that is, the space where a == p
        mask = tf.matrix_set_diag(labels_equal,
                                  tf.zeros(tf.shape(labels)[0], dtype=tf.bool))

    return mask
Exemple #25
0
def discriminative_instance_loss(y_true, y_pred, delta_v=0.5, delta_d=1.5, order=2, gamma=1e-3):
    """Computes the discriminative instance loss
    # Arguments:
        y_true: A tensor of the same shape as `y_pred`.
        y_pred: A tensor of the vector embedding
    """

    def temp_norm(ten, axis=-1):
        return tf.sqrt(K.epsilon() + tf.reduce_sum(tf.square(ten), axis=axis))

    channel_axis = 1 if K.image_data_format() == 'channels_first' else len(y_pred.get_shape()) - 1
    other_axes = [x for x in list(range(len(y_pred.get_shape()))) if x != channel_axis]

    # Compute variance loss
    cells_summed = tf.tensordot(y_true, y_pred, axes=[other_axes, other_axes])
    n_pixels = tf.cast(tf.count_nonzero(y_true, axis=other_axes), dtype=K.floatx()) + K.epsilon()
    n_pixels_expand = tf.expand_dims(n_pixels, axis=1) + K.epsilon()
    mu = tf.divide(cells_summed, n_pixels_expand)

    delta_v = tf.constant(delta_v, dtype=K.floatx())
    mu_tensor = tf.tensordot(y_true, mu, axes=[[channel_axis], [0]])
    L_var_1 = y_pred - mu_tensor
    L_var_2 = tf.square(tf.nn.relu(temp_norm(L_var_1, axis=channel_axis) - delta_v))
    L_var_3 = tf.tensordot(L_var_2, y_true, axes=[other_axes, other_axes])
    L_var_4 = tf.divide(L_var_3, n_pixels)
    L_var = tf.reduce_mean(L_var_4)

    # Compute distance loss
    mu_a = tf.expand_dims(mu, axis=0)
    mu_b = tf.expand_dims(mu, axis=1)

    diff_matrix = tf.subtract(mu_b, mu_a)
    L_dist_1 = temp_norm(diff_matrix, axis=channel_axis)
    L_dist_2 = tf.square(tf.nn.relu(tf.constant(2 * delta_d, dtype=K.floatx()) - L_dist_1))
    diag = tf.constant(0, dtype=K.floatx()) * tf.diag_part(L_dist_2)
    L_dist_3 = tf.matrix_set_diag(L_dist_2, diag)
    L_dist = tf.reduce_mean(L_dist_3)

    # Compute regularization loss
    L_reg = gamma * temp_norm(mu, axis=-1)
    L = L_var + L_dist + tf.reduce_mean(L_reg)

    return L
Exemple #26
0
def quadratic_regression_pd(SA, costs, diag_cost=False):
    assert not diag_cost
    global global_step
    dsa = SA.shape[-1]
    C = tf.get_variable('cost_mat{}'.format(global_step),
                        shape=[dsa, dsa],
                        dtype=tf.float32,
                        initializer=tf.random_uniform_initializer(minval=-0.1,
                                                                  maxval=0.1))
    L = tf.matrix_band_part(C, -1, 0)
    L = tf.matrix_set_diag(L, tf.maximum(tf.matrix_diag_part(L), 0.0))
    LL = tf.matmul(L, tf.transpose(L))
    c = tf.get_variable('cost_vec{}'.format(global_step),
                        shape=[dsa],
                        dtype=tf.float32,
                        initializer=tf.zeros_initializer())
    b = tf.get_variable('cost_bias{}'.format(global_step),
                        shape=[],
                        dtype=tf.float32,
                        initializer=tf.zeros_initializer())
    s_ = tf.placeholder(tf.float32, [None, dsa])
    c_ = tf.placeholder(tf.float32, [None])
    pred_cost = 0.5 * tf.einsum('na,ab,nb->n', s_, LL, s_) + \
            tf.einsum('na,a->n', s_, c) + b
    mse = tf.reduce_mean(tf.square(pred_cost - c_))
    opt = tf.train.MomentumOptimizer(1e-3, 0.9).minimize(mse)
    N = SA.shape[0]
    SA = SA.reshape([-1, dsa])
    costs = costs.reshape([-1])
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for itr in tqdm.trange(1000, desc='Fitting cost'):
            _, m = sess.run([opt, mse], feed_dict={
                s_: SA,
                c_: costs,
            })
            if itr == 0 or itr == 999:
                print('mse itr {}: {}'.format(itr, m))
        cost_mat, cost_vec = sess.run((LL, c))

    global_step += 1
    return cost_mat, cost_vec
Exemple #27
0
 def deep_linear(x, full_cov=True):
     # x: [batch_size, x_dim]
     h = x
     for n_units in layer_sizes:
         # h: [batch_size, n_units]
         h = tf.layers.dense(h, n_units, activation=activation)
     # w_mean: [n_units]
     n_units = layer_sizes[-1]
     w_mean = tf.get_variable("w_mean",
                              shape=[n_units],
                              dtype=tf.float64,
                              initializer=tf.truncated_normal_initializer(
                                  stddev=0.001, dtype=tf.float64))
     w_cov_raw = tf.get_variable("w_cov",
                                 dtype=tf.float64,
                                 initializer=tf.eye(n_units,
                                                    dtype=tf.float64))
     w_cov_tril = tf.matrix_set_diag(
         tf.matrix_band_part(w_cov_raw, -1, 0),
         tf.nn.softplus(tf.matrix_diag_part(w_cov_raw)))
     # f_mean: [batch_size]
     f_mean = tf.squeeze(tf.matmul(h, w_mean[:, None]), -1)
     # f_cov: [batch_size, batch_size]
     f_cov_half = tf.matmul(h, w_cov_tril)
     if full_cov:
         f_cov = tf.matmul(f_cov_half, f_cov_half, transpose_b=True)
         f_cov = f_cov + tf.eye(tf.shape(f_cov)[0], dtype=tf.float64) * \
             gpflow.settings.jitter
         if mvn:
             f_cov_tril = tf.cholesky(f_cov)
             f_dist = zs.distributions.MultivariateNormalCholesky(
                 f_mean, f_cov_tril)
             return f_dist
         else:
             return f_mean, f_cov
     else:
         # hw_cov: [batch_size, n_units]
         hw_cov = tf.matmul(f_cov_half, w_cov_tril, transpose_b=True)
         # f_cov_diag: [batch_size]
         f_var = tf.reduce_sum(hw_cov * h, axis=-1)
         f_var += gpflow.settings.jitter
         return f_mean, f_var
Exemple #28
0
    def test_gamma_gaussian_equivalent(self):
        # Check that the Cholesky-Wishart distribution with the sparsity correction factor is equivalent to a
        # SquareRootGamma-Gaussian distribution after removing the log probability of the zero terms in the off diagonal
        sqrt_gamma_gaussian = SqrtGammaGaussian(
            df=self.sqrt_w.df, log_diag_scale=self.sqrt_w.log_diag_scale)
        x_with_log_diag = tf.matrix_set_diag(
            self.x, self.x_cov_obj.log_diag_chol_precision)
        log_prob1_gamma = sqrt_gamma_gaussian._log_prob_sqrt_gamma(
            x_with_log_diag)

        log_prob1_normal = sqrt_gamma_gaussian.normal_dist.log_prob(self.x)
        off_diag_mask = self.x_cov_obj.np_off_diag_mask()
        log_prob1_normal = tf.reduce_sum(log_prob1_normal * off_diag_mask,
                                         axis=[1, 2])

        log_prob_gg = log_prob1_gamma + log_prob1_normal

        log_prob_wishart = self.sqrt_w.log_prob(self.x_cov_obj)

        self._asset_allclose_tf_feed(log_prob_gg, log_prob_wishart)
Exemple #29
0
    def test_log_prob_sparse(self):
        # Test that square root Gamma Gaussian with sparse matrices is the same as a the dense version,
        # when the sparse elements are removed afterwards
        x_with_log_diag = tf.matrix_set_diag(
            self.x, self.x_cov_obj.log_diag_chol_precision)
        log_prob1_gamma = self.sqrt_gamma_gaussian_dense._log_prob_sqrt_gamma(
            x_with_log_diag)

        log_prob1_normal = self.sqrt_gamma_gaussian_dense.normal_dist.log_prob(
            self.x)
        off_diag_mask = self.x_cov_obj.np_off_diag_mask(
        )  # Zero out off-diagonal terms
        log_prob1_normal = tf.reduce_sum(log_prob1_normal * off_diag_mask,
                                         axis=[1, 2])

        log_prob1 = log_prob1_gamma + log_prob1_normal

        log_prob2 = self.sqrt_gamma_gaussian.log_prob(self.x_cov_obj)

        self._asset_allclose_tf_feed(log_prob1, log_prob2)
def _get_normed_sym_tf(X_, batch_size):
    """
    Compute the normalized and symmetrized probability matrix from
    relative probabilities X_, where X_ is a Tensorflow Tensor
    Parameters
    ----------
    X_ : 2-d Tensor (N, N)
        asymmetric probabilities. For instance, X_(i, j) = P(i|j)
    Returns
    -------
    P : 2-d Tensor (N, N)
        symmetric probabilities, making the assumption that P(i|j) = P(j|i)
        Diagonals are all 0s."""
    toset = tf.constant(0, shape=[batch_size], dtype=X_.dtype)
    X_ = tf.matrix_set_diag(X_, toset)
    norm_facs = tf.reduce_sum(X_, axis=0, keep_dims=True)
    X_ = X_ / norm_facs
    X_ = 0.5*(X_ + tf.transpose(X_))

    return X_
Exemple #31
0
def get_cholesky_variable(name,
                          shape=None,
                          dtype=None,
                          initializer=None,
                          regularizer=None,
                          trainable=True,
                          collections=None,
                          caching_device=None,
                          partitioner=None,
                          validate_shape=True,
                          custom_getter=None,
                          transform=None):
    """
    Get an existing Cholesky variable or create a new one.
    """
    x = get_tril_variable(name, shape, dtype, initializer, regularizer,
                          trainable, collections, caching_device, partitioner,
                          validate_shape, custom_getter)
    transform = transform or tf.nn.softplus
    return tf.matrix_set_diag(x, transform(tf.matrix_diag_part(x)))
 def calc_min_feasible_power(self, abs_H, min_rates, noise_power):
     abs_H_2 = tf.reshape(
         tf.square(abs_H),
         [-1, self.top_config.user_num, self.top_config.user_num])
     check_mat = tf.matrix_transpose(abs_H_2)
     diag_part = tf.matrix_diag_part(abs_H_2) + 1e-10
     diag_zeros = tf.zeros(tf.shape(diag_part))
     diag_part = tf.reshape(diag_part, [-1, self.top_config.user_num, 1])
     check_mat = tf.divide(check_mat, diag_part)
     check_mat = tf.matrix_set_diag(check_mat, diag_zeros)
     min_snrs = tf.cast(
         tf.reshape(2**min_rates - 1, [self.top_config.user_num, 1]),
         tf.float32)
     check_mat = tf.multiply(check_mat, min_snrs)
     u = np.divide(min_snrs, diag_part) * noise_power
     inv_id_sub_check_mat = tf.matrix_inverse(
         tf.subtract(tf.eye(self.top_config.user_num), check_mat))
     min_feasible_power = tf.matmul(inv_id_sub_check_mat, u)
     min_feasible_power = tf.reshape(min_feasible_power,
                                     [-1, self.top_config.user_num])
     return min_feasible_power
Exemple #33
0
def linear_covariance(x_mean, x_cov, A, b):
    x_var_diag = tf.matrix_diag_part(x_cov)
    xx_mean = x_var_diag + x_mean * x_mean

    term1_diag = tf.matmul(xx_mean, A.var)

    flat_xCov = tf.reshape(x_cov, [-1, A.shape[0]])  # [b*x, x]
    xCov_A = tf.matmul(flat_xCov, A.mean)  # [b*x, y]
    xCov_A = tf.reshape(xCov_A, [-1, A.shape[0], A.shape[1]])  # [b, x, y]
    xCov_A = tf.transpose(xCov_A, [0, 2, 1])  # [b, y, x]
    xCov_A = tf.reshape(xCov_A, [-1, A.shape[0]])  # [b*y, x]
    A_xCov_A = tf.matmul(xCov_A, A.mean)  # [b*y, y]
    A_xCov_A = tf.reshape(A_xCov_A, [-1, A.shape[1], A.shape[1]])  # [b, y, y]

    term2 = A_xCov_A
    term2_diag = tf.matrix_diag_part(term2)

    term3_diag = b.var

    result_diag = term1_diag + term2_diag + term3_diag
    return tf.matrix_set_diag(term2, result_diag)
Exemple #34
0
def meanfield_nn(D, k, temp=None, exclude_self=False):
    logits = D

    if temp is not None:
        logits = logits * temp  # temp is actually treated as inverse temperature since this is numerically more stable
        print('with temp')

    if exclude_self:
        infs = tf.ones_like(logits[:, :, :, 0]) * np.inf
        # infs = tf.ones_like(logits[:,:,:,0]) * (10000.0) # setting diagonal to -inf produces numerical problems ...
        logits = tf.matrix_set_diag(logits, -infs)

    W = []
    for i in range(k):
        weights_exp = tf.nn.softmax(logits, axis=-1)
        eps = 1.2e-7
        weights_exp = tf.clip_by_value(weights_exp, eps, 1 - eps)
        W.append(weights_exp)
        logits = logits + tf.log1p(-weights_exp)

    return W
Exemple #35
0
    def call(self, x, mask, training=False):
        self.step += 1
        x_ = x
        x = dropout(x, keep_prob=self.keep_prob, training=training)

        if self.step == 0:
            if not self.identity:
                self.linear = layers.Dense(melt.get_shape(x, -1),
                                           activation=tf.nn.relu)
            else:
                self.linear = None

        # NOTICE shared linear!
        if self.linear is not None:
            x = self.linear(x)

        scores = tf.matmul(x, tf.transpose(x, [0, 2, 1]))

        #  x = tf.constant([[[1,2,3], [4,5,6],[7,8,9]],[[1,2,3],[4,5,6],[7,8,9]]], dtype=tf.float32) # shape=(2, 3, 3)
        #  z = tf.matrix_set_diag(x, tf.zeros([2, 3]))
        if not self.diag:
            # TODO better dim
            dim0 = melt.get_shape(scores, 0)
            dim1 = melt.get_shape(scores, 1)
            scores = tf.matrix_set_diag(scores, tf.zeros([dim0, dim1]))

        if mask is not None:
            JX = melt.get_shape(x, 1)
            mask = tf.tile(tf.expand_dims(mask, axis=1), [1, JX, 1])
            scores = softmax_mask(scores, mask)

        alpha = tf.nn.softmax(scores)
        self.alpha = alpha

        x = tf.matmul(alpha, x)

        if self.combine is None:
            return y
        else:
            return self.combine(x_, x, training=training)
Exemple #36
0
def LSEnet(model, Ip, u1p, u2p):
    # computation graph that defines least squared estimation of the electric field
    delta_Ep_pred = tf.cast(tf.tensordot(u1p, model.G1_real, axes=[[-1], [1]]) + tf.tensordot(u2p, model.G2_real, axes=[[-1], [1]]), tf.complex128) + \
      + 1j * tf.cast(tf.tensordot(u1p, model.G1_imag, axes=[[-1], [1]]) + tf.tensordot(u2p, model.G2_imag, axes=[[-1], [1]]), tf.complex128)
    delta_Ep_expand = tf.expand_dims(delta_Ep_pred, 2)
    delta_Ep_expand_diff = delta_Ep_expand[:, 1::
                                           2, :, :] - delta_Ep_expand[:, 2::
                                                                      2, :, :]
    y = tf.transpose(Ip[:, 1::2, :] - Ip[:, 2::2, :], [0, 2, 1])
    H = tf.concat(
        [2 * tf.real(delta_Ep_expand_diff), 2 * tf.imag(delta_Ep_expand_diff)],
        axis=2)
    H = tf.transpose(H, [0, 3, 1, 2])
    Ht_H = tf.matmul(tf.transpose(H, [0, 1, 3, 2]), H)
    Ht_H_inv_Ht = tf.matmul(
        tf.matrix_inverse(Ht_H + tf.eye(2, dtype=tf.float64) * 1e-12),
        tf.transpose(H, [0, 1, 3, 2]))
    x_new = tf.squeeze(tf.matmul(Ht_H_inv_Ht, tf.expand_dims(y, -1)), -1)

    n_observ = model.n_observ
    contrast_p = tf.reduce_mean(Ip, axis=2)

    d_contrast_p = tf.reduce_mean(tf.abs(delta_Ep_pred)**2, axis=2)

    Rp = tf.tensordot(
        tf.expand_dims(model.R0 + model.R1 * contrast_p + 4 *
                       (model.Q0 + model.Q1 * d_contrast_p) * contrast_p,
                       axis=-1),
        tf.ones((1, model.num_pix), dtype=tf.float64),
        axes=[[-1], [0]]) + 1e-24
    Rp = tf.transpose(Rp, [0, 2, 1])
    R_diff = Rp[:, :, 1::2] + Rp[:, :, 2::2]
    R = tf.matrix_set_diag(
        tf.concat([tf.expand_dims(tf.zeros_like(R_diff), -1)] *
                  (n_observ // 2), -1), R_diff)
    P_new = tf.matmul(tf.matmul(Ht_H_inv_Ht, R),
                      tf.transpose(Ht_H_inv_Ht, [0, 1, 3, 2]))
    Enp_pred_new = tf.cast(x_new[:, :, 0], dtype=tf.complex128) + 1j * tf.cast(
        x_new[:, :, 1], dtype=tf.complex128)
    return Enp_pred_new, P_new, H
Exemple #37
0
    def _decode_verification(self):
        with tf.variable_scope("Cross_passage_verification"):
            batch_size = tf.shape(self.start_label)[0]
            content_probs = tf.reshape(
                self.content_probs,
                [tf.shape(self.p_emb)[0],
                 tf.shape(self.p_emb)[1], 1])  # [batch * 5 , p , 1]
            ver_P = content_probs * self.p_emb
            ver_P = tf.reshape(ver_P, [
                batch_size, -1,
                tf.shape(self.p_emb)[1],
                3 * self.append_wordvec_size + self.vocab.embed_dim
            ])  #[batch , 5 , p , wordvec dimension = 3 * 1024 + 300]
            RA = tf.reduce_mean(ver_P, axis=2)  # [batch , 5 , wordvec]
            #print("RA_concated.shape = ",RA.shape)
            #Given the representation of the answer candidates from all passages {rAi }, each answer candidate then attends to other candidates to collect supportive information via attention mechanism
            #tf.batch_mat_mul()
            S = tf.matmul(RA, RA, transpose_a=False,
                          transpose_b=True)  # [batch , 5 , 5]
            S = tf.matrix_set_diag(
                input=S,
                diagonal=tf.zeros(shape=[batch_size,
                                         tf.shape(S)[1]],
                                  dtype=S.dtype)
            )  #[batch , 5 , 5] except for the main digonal of innermost matrices is all 0
            S = tf.nn.softmax(S, -1)  #[batch , 5 , 5] 每一行都是归一化过的了
            RA_Complementary = tf.matmul(S,
                                         RA,
                                         transpose_a=False,
                                         transpose_b=False)
            #Here ̃rAi is the collected verification information from other passages based on the attention weights. Then we pass it together with the original representation rAi to a fully connected layer
            RA_concated = tf.concat(
                [RA, RA_Complementary, RA * RA_Complementary],
                -1)  # [batch , 5 , 3 * (3 * 1024 + 300) = 10116]

            g = tc.layers.fully_connected(RA_concated,
                                          num_outputs=self.max_p_num,
                                          activation_fn=None)  #[batch , 5 ,1]
            g = tf.reshape(g, shape=[batch_size, -1])  #[batch , 5]
            self.pred_pass_prob = tf.nn.softmax(g, -1)  #[batch , 5]
    def get_initial_state(self, batch_size):
        with tf.variable_scope('initial_state', reuse=tf.AUTO_REUSE):

            R_0_params = tf.get_variable(
                name='R_0_params',
                dtype=tf.float32,
                shape=[self.memory_size, self.code_size],
                initializer=tf.random_normal_initializer(mean=0.0,
                                                         stddev=0.05),
                trainable=self.trainable_memory)
            # note we do not use a zero init for the DKM.
            # this is to allow DKM RLS algo to compute a nonzero-mean addressing weight distribution
            # when using p(M) to compute the first q(w) during writing of an episode.
            # note our models use a randomized or otherwise asymmetric init for q^(0)(M) instead of assigning it the prior's values.
            # so we can (and do) use a zero init for our models' priors.

            U_0_params = tf.get_variable(
                name='U_0_params',
                dtype=tf.float32,
                shape=[self.memory_size, self.memory_size],
                initializer=tf.zeros_initializer(),
                trainable=self.trainable_memory)

            R_0 = R_0_params

            upper_tri = tf.matrix_band_part(U_0_params, 0, -1)
            strictly_upper_tri = tf.matrix_set_diag(
                upper_tri,
                tf.zeros_like(tf.matrix_diag_part(upper_tri),
                              dtype=tf.float32))

            logdiag = tf.matrix_diag_part(U_0_params)
            U_0_diag = tf.diag(tf.exp(logdiag))
            U_0_offdiag = strictly_upper_tri + tf.transpose(strictly_upper_tri)
            U_0 = U_0_diag + U_0_offdiag

            R = tf.tile(tf.expand_dims(R_0, 0), [batch_size, 1, 1])
            U = tf.tile(tf.expand_dims(U_0, 0), [batch_size, 1, 1])
            return MemoryState(R=R, U=U)
Exemple #39
0
def get_dist_table_novariance(x, dist, symmetric, alpha):
    batch_size = get_shape(x)[0]
    P = pairwise_distance(x, x)

    if dist == 'gauss':
        P = tf.exp(-P)
    elif dist == 'tdis':
        P = tf.pow(1. + P, -1.)

    toset = tf.constant(0., shape=[batch_size], dtype=tf.float32)
    P = tf.matrix_set_diag(P, toset)

    if symmetric == True:
        m = tf.reduce_sum(P)
        P = P / m
    else:
        m = tf.reduce_sum(P, axis=1)
        m = tf.tile(tf.expand_dims(m, axis=1), [1, batch_size])
        P = tf.div(P, m)
        P = 0.5 * (P + tf.transpose(P))
        P = P / batch_size
    return P
Exemple #40
0
def _uniform_correlation_like_matrix(num_rows, batch_shape, dtype, seed):
    """Returns a uniformly random `Tensor` of "correlation-like" matrices.

  A "correlation-like" matrix is a symmetric square matrix with all entries
  between -1 and 1 (inclusive) and 1s on the main diagonal.  Of these,
  the ones that are positive semi-definite are exactly the correlation
  matrices.

  Args:
    num_rows: Python `int` dimension of the correlation-like matrices.
    batch_shape: `Tensor` or Python `tuple` of `int` shape of the
      batch to return.
    dtype: `dtype` of the `Tensor` to return.
    seed: Random seed.

  Returns:
    matrices: A `Tensor` of shape `batch_shape + [num_rows, num_rows]`
      and dtype `dtype`.  Each entry is in [-1, 1], and each matrix
      along the bottom two dimensions is symmetric and has 1s on the
      main diagonal.
  """
    num_entries = num_rows * (num_rows + 1) / 2
    ones = tf.ones(shape=[num_entries], dtype=dtype)
    # It seems wasteful to generate random values for the diagonal since
    # I am going to throw them away, but `fill_triangular` fills the
    # diagonal, so I probably need them.
    # It's not impossible that it would be more efficient to just fill
    # the whole matrix with random values instead of messing with
    # `fill_triangular`.  Then would need to filter almost half out with
    # `matrix_band_part`.
    unifs = uniform.Uniform(-ones, ones).sample(batch_shape, seed=seed)
    tril = util.fill_triangular(unifs)
    symmetric = tril + tf.matrix_transpose(tril)
    diagonal_ones = tf.ones(shape=util.pad(batch_shape,
                                           axis=0,
                                           back=True,
                                           value=num_rows),
                            dtype=dtype)
    return tf.matrix_set_diag(symmetric, diagonal_ones)
 def get_KL_logistic(X, posterior_alpha, prior_lambda_, posterior_lambda_,
                     prior_alpha):
     """
     Calculates KL divergence between two Concrete distributions using samples from posterior Concrete distribution.
     KL(Concrete(alpha, posterior_lambda_) || Concrete(prior_alpha, prior_lambda))
     Args:
         X: Tensor of shape S x N x N. These are samples from posterior Concrete distribution.
         posterior_alpha: Tensor of shape N x N. alpha for posterior distributions.
         prior_lambda_: Tensor of shape (). prior_lambda_ of prior distribution.
         posterior_lambda_: Tensor of shape (). posterior_lambda_ for posterior distribution.
         prior_alpha: Tensor of shape N x N. alpha for prior distributions.
     Returns:
         : Tensor of shape () representing KL divergence between the two concrete distributions.
     """
     logdiff = Latnet.logp_logistic(
         X, posterior_alpha, posterior_lambda_) - Latnet.logp_logistic(
             X, prior_alpha, prior_lambda_)
     logdiff = tf.matrix_set_diag(
         logdiff,
         tf.zeros((tf.shape(logdiff)[0], tf.shape(logdiff)[1]),
                  dtype=Latnet.FLOAT))  # set diagonal part to zero
     return tf.reduce_sum(tf.reduce_mean(logdiff, [0]))
 def _assertions(self, x):
   if not self.validate_args:
     return []
   shape = tf.shape(x)
   is_matrix = tf.assert_rank_at_least(
       x, 2, message="Input must have rank at least 2.")
   is_square = tf.assert_equal(
       shape[-2], shape[-1], message="Input must be a square matrix.")
   above_diagonal = tf.matrix_band_part(
       tf.matrix_set_diag(x, tf.zeros(shape[:-1], dtype=tf.float32)), 0, -1)
   is_lower_triangular = tf.assert_equal(
       above_diagonal,
       tf.zeros_like(above_diagonal),
       message="Input must be lower triangular.")
   # A lower triangular matrix is nonsingular iff all its diagonal entries are
   # nonzero.
   diag_part = tf.matrix_diag_part(x)
   is_nonsingular = tf.assert_none_equal(
       diag_part,
       tf.zeros_like(diag_part),
       message="Input must have all diagonal entries nonzero.")
   return [is_matrix, is_square, is_lower_triangular, is_nonsingular]
    def _operator_and_mat_and_feed_dict(self, shape, dtype, use_placeholder):
        shape = list(shape)
        diag_shape = shape[:-1]

        # Upper triangle will be ignored.
        # Use a diagonal that ensures this matrix is well conditioned.
        tril = tf.random_normal(shape=shape, dtype=dtype.real_dtype)
        diag = tf.random_uniform(shape=diag_shape,
                                 dtype=dtype.real_dtype,
                                 minval=2.,
                                 maxval=3.)
        if dtype.is_complex:
            tril = tf.complex(tril,
                              tf.random_normal(shape, dtype=dtype.real_dtype))
            diag = tf.complex(
                diag,
                tf.random_uniform(shape=diag_shape,
                                  dtype=dtype.real_dtype,
                                  minval=2.,
                                  maxval=3.))

        tril = tf.matrix_set_diag(tril, diag)

        tril_ph = tf.placeholder(dtype=dtype)

        if use_placeholder:
            # Evaluate the tril here because (i) you cannot feed a tensor, and (ii)
            # tril is random and we want the same value used for both mat and
            # feed_dict.
            tril = tril.eval()
            operator = linalg.LinearOperatorTriL(tril_ph)
            feed_dict = {tril_ph: tril}
        else:
            operator = linalg.LinearOperatorTriL(tril)
            feed_dict = None

        mat = tf.matrix_band_part(tril, -1, 0)

        return operator, mat, feed_dict
def SAMME_R_voting_strategy(logits):
    """
    Algorithm 4 of "Multi-class AdaBoost" by Zhu et al. 2006

    PDF: Can be found at the bottom of page 9
    (https://web.stanford.edu/~hastie/Papers/samme.pdf)

    Args:
      See `voting strategy`
    """
    class_num = logits[0].get_shape().as_list()[-1]
    for x in logits:
        assert x.shape == logits[0].shape

    log_probs = [tf.log(tf.nn.softmax(l)) for l in logits]
    # two steps to get a matrix of -1 except for the diagonal which is 1
    hk_inner_prod = tf.constant(
        (-1 / class_num), dtype=tf.float32, shape=(class_num, class_num))
    hk_inner_prod = tf.matrix_set_diag(hk_inner_prod, tf.ones([class_num]))
    h_ks = [(class_num - 1) * tf.matmul(lp, hk_inner_prod) for lp in log_probs]

    return tf.accumulate_n(h_ks)
Exemple #45
0
  def testSquareBatch(self):
    with self.test_session(use_gpu=self._use_gpu):
      v_batch = np.array([[-1.0, -2.0, -3.0],
                          [-4.0, -5.0, -6.0]])
      mat_batch = np.array(
          [[[1.0, 0.0, 3.0],
            [0.0, 2.0, 0.0],
            [1.0, 0.0, 3.0]],
           [[4.0, 0.0, 4.0],
            [0.0, 5.0, 0.0],
            [2.0, 0.0, 6.0]]])

      mat_set_diag_batch = np.array(
          [[[-1.0, 0.0, 3.0],
            [0.0, -2.0, 0.0],
            [1.0, 0.0, -3.0]],
           [[-4.0, 0.0, 4.0],
            [0.0, -5.0, 0.0],
            [2.0, 0.0, -6.0]]])
      output = tf.matrix_set_diag(mat_batch, v_batch)
      self.assertEqual((2, 3, 3), output.get_shape())
      self.assertAllEqual(mat_set_diag_batch, output.eval())
def _uniform_correlation_like_matrix(num_rows, batch_shape, dtype, seed):
  """Returns a uniformly random `Tensor` of "correlation-like" matrices.

  A "correlation-like" matrix is a symmetric square matrix with all entries
  between -1 and 1 (inclusive) and 1s on the main diagonal.  Of these,
  the ones that are positive semi-definite are exactly the correlation
  matrices.

  Args:
    num_rows: Python `int` dimension of the correlation-like matrices.
    batch_shape: `Tensor` or Python `tuple` of `int` shape of the
      batch to return.
    dtype: `dtype` of the `Tensor` to return.
    seed: Random seed.

  Returns:
    matrices: A `Tensor` of shape `batch_shape + [num_rows, num_rows]`
      and dtype `dtype`.  Each entry is in [-1, 1], and each matrix
      along the bottom two dimensions is symmetric and has 1s on the
      main diagonal.
  """
  num_entries = num_rows * (num_rows + 1) / 2
  ones = tf.ones(shape=[num_entries], dtype=dtype)
  # It seems wasteful to generate random values for the diagonal since
  # I am going to throw them away, but `fill_triangular` fills the
  # diagonal, so I probably need them.
  # It's not impossible that it would be more efficient to just fill
  # the whole matrix with random values instead of messing with
  # `fill_triangular`.  Then would need to filter almost half out with
  # `matrix_band_part`.
  unifs = uniform.Uniform(-ones, ones).sample(batch_shape, seed=seed)
  tril = util.fill_triangular(unifs)
  symmetric = tril + tf.matrix_transpose(tril)
  diagonal_ones = tf.ones(
      shape=util.pad(batch_shape, axis=0, back=True, value=num_rows),
      dtype=dtype)
  return tf.matrix_set_diag(symmetric, diagonal_ones)
 def _covariance(self):
   # Derivation: https://sachinruk.github.io/blog/von-Mises-Fisher/
   event_dim = self.event_shape[0].value
   if event_dim is None:
     raise ValueError('event shape must be statically known for _bessel_ive')
   # TODO(bjp): Enable this; numerically unstable.
   if event_dim > 2:
     raise ValueError('vMF covariance is numerically unstable for dim>2')
   concentration = self.concentration[..., tf.newaxis]
   safe_conc = tf.where(
       concentration > 0, concentration, tf.ones_like(concentration))
   h = (_bessel_ive(event_dim / 2, safe_conc) /
        _bessel_ive(event_dim / 2 - 1, safe_conc))
   intermediate = (
       tf.matmul(self.mean_direction[..., :, tf.newaxis],
                 self.mean_direction[..., tf.newaxis, :]) *
       (1 - event_dim * h / safe_conc - h**2)[..., tf.newaxis])
   cov = tf.matrix_set_diag(
       intermediate, tf.matrix_diag_part(intermediate) + (h / safe_conc))
   return tf.where(
       concentration[..., tf.newaxis] > tf.zeros_like(cov),
       cov,
       tf.linalg.eye(event_dim,
                     batch_shape=self.batch_shape_tensor()) / event_dim)
  def testDefaultsYieldCorrectShapesAndValues(self):
    batch_shape = [4, 3]
    x_size = 3
    mvn_size = 5
    x_ = np.random.randn(*np.concatenate([batch_shape, [x_size]]))

    x = tf.constant(x_)
    mvn = tfp.trainable_distributions.multivariate_normal_tril(x, dims=mvn_size)
    scale = mvn.scale.to_dense()
    scale_upper = tf.matrix_set_diag(
        tf.matrix_band_part(scale, num_lower=0, num_upper=-1),
        tf.zeros(np.concatenate([batch_shape, [mvn_size]]), scale.dtype))
    scale_diag = tf.matrix_diag_part(scale)

    self.evaluate(tf.global_variables_initializer())
    [
        batch_shape_,
        event_shape_,
        scale_diag_,
        scale_upper_,
    ] = self.evaluate([
        mvn.batch_shape_tensor(),
        mvn.event_shape_tensor(),
        scale_diag,
        scale_upper,
    ])

    self.assertAllEqual(batch_shape, mvn.batch_shape)
    self.assertAllEqual(batch_shape, batch_shape_)

    self.assertAllEqual([mvn_size], mvn.event_shape)
    self.assertAllEqual([mvn_size], event_shape_)

    self.assertAllEqual(np.ones_like(scale_diag_, dtype=np.bool),
                        scale_diag_ > 0.)
    self.assertAllEqual(np.zeros_like(scale_upper_), scale_upper_)
  def weight_change_for_layer(self, meta_opt, l_idx, w_base, b_base, upper_h,
                              lower_h, upper_x, lower_x, prefix, include_bias):
    """Compute the change in weights for each layer.
    This computes something roughly analagous to a gradient.
    """
    reduce_upper_h = upper_h
    reduce_lower_h = lower_h

    BS = lower_x.shape.as_list()[0]

    change_w_terms = dict()

    # initial weight value normalized
    # normalize the weights per receptive-field, rather than per-matrix
    weight_scale = tf.rsqrt(
        tf.reduce_mean(w_base**2, axis=0, keepdims=True) + 1e-6)
    w_base *= weight_scale

    change_w_terms['w_base'] = w_base

    # this will act to decay larger weights towards zero
    change_w_terms['large_decay'] = w_base**2 * tf.sign(w_base)

    # term based on activations
    ux0 = upper_x - tf.reduce_mean(upper_x, axis=0, keepdims=True)
    uxs0 = ux0 * tf.rsqrt(tf.reduce_mean(ux0**2, axis=0, keepdims=True) + 1e-6)
    change_U = tf.matmul(uxs0, uxs0, transpose_a=True) / BS
    change_U /= tf.sqrt(float(change_U.shape.as_list()[0]))

    cw = tf.matmul(w_base, change_U)
    cw_scale = tf.rsqrt(tf.reduce_mean(cw**2 + 1e-8))
    cw *= cw_scale
    change_w_terms['decorr_x'] = cw

    # hebbian term
    lx0 = lower_x - tf.reduce_mean(lower_x, axis=0, keepdims=True)
    lxs0 = lx0 * tf.rsqrt(tf.reduce_mean(lx0**2, axis=0, keepdims=True) + 1e-6)
    cw = tf.matmul(lxs0, uxs0, transpose_a=True) / BS
    change_w_terms['hebb'] = -cw

    # 0th order term
    w_term = meta_opt.low_rank_readout(prefix + 'weight_readout_0', upper_h,
                                       lower_h)
    change_w_terms['0_order'] = w_term

    # # rbf term (weight update scaled by distance from 0)
    w_term = meta_opt.low_rank_readout(prefix + 'weight_readout_rbf',
                                       reduce_upper_h, reduce_lower_h)
    change_w_terms['rbf'] = tf.exp(-w_base**2) * w_term

    # 1st order term (weight dependent update to weights)
    w_term = meta_opt.low_rank_readout(prefix + 'weight_readout_1',
                                       reduce_upper_h, reduce_lower_h)
    change_w_terms['1_order'] = w_base * w_term

    # more terms based on single layer readouts.
    for update_type in ['lin', 'sqr']:
      for h_source, h_source_name in [(reduce_upper_h, 'upper'),
                                      (reduce_lower_h, 'lower')]:
        structures = ['symm']
        if update_type == 'lin' and h_source_name == 'upper':
          structures += ['psd']
        for structure in structures:
          name = update_type + '_' + h_source_name + '_' + structure
          if structure == 'symm':
            change_U = meta_opt.low_rank_readout(prefix + name, h_source,
                                                 h_source)
            change_U = (change_U + tf.transpose(change_U)) / tf.sqrt(2.)
            change_U = tf.matrix_set_diag(change_U,
                                          tf.zeros(
                                              [change_U.shape.as_list()[0]]))
          elif structure == 'psd':
            change_U = meta_opt.low_rank_readout(
                prefix + name, h_source, None, psd=True)
          else:
            assert False
          change_U /= tf.sqrt(float(change_U.shape.as_list()[0]))

          if update_type == 'lin':
            sign_multiplier = tf.ones_like(w_base)
            w_base_l = w_base
          elif update_type == 'sqr':
            sign_multiplier = tf.sign(w_base)
            w_base_l = tf.sqrt(1. + w_base**2) - 1.

          if h_source_name == 'upper':
            cw = tf.matmul(w_base_l, change_U)  # [N^l-1 x N^l]
          elif h_source_name == 'lower':
            cw = tf.matmul(change_U, w_base_l)
          change_w_terms[name] = cw * sign_multiplier


    if prefix == 'forward':
      change_w = meta_opt.merge_change_w_forward(
          change_w_terms, global_prefix=prefix, prefix='l%d' % l_idx)
    elif prefix == 'backward':
      change_w = meta_opt.merge_change_w_backward(
          change_w_terms, global_prefix=prefix, prefix='l%d' % l_idx)
    else:
      assert (False)

    if not include_bias:
      return change_w

    change_b = tf.reduce_mean(meta_opt.bias_readout(upper_h), [0])

    # force nonlinearities to be exercised -- biases can't all be increased without bound
    change_b_mean = tf.reduce_mean(change_b)
    offset = -tf.nn.relu(-change_b_mean)
    change_b -= offset

    var = tf.reduce_mean(tf.square(change_b), [0], keepdims=True)
    change_b = (change_b) / tf.sqrt(0.5 + var)
    return change_w, change_b
 def _forward(self, x):
   diag = self._diag_bijector.forward(tf.matrix_diag_part(x))
   return tf.matrix_set_diag(x, diag)
def _add_diagonal_shift(matrix, shift):
  diag_plus_shift = tf.matrix_diag_part(matrix) + shift
  return tf.matrix_set_diag(matrix, diag_plus_shift)
 def _covariance(self):
   p = self.probs
   ret = -tf.matmul(p[..., None], p[..., None, :])
   return tf.matrix_set_diag(ret, self._variance())
def _add_diagonal_shift(matrix, shift):
  return tf.matrix_set_diag(
      matrix, tf.matrix_diag_part(matrix) + shift, name='add_diagonal_shift')
Exemple #54
0
  def _sample_n(self, num_samples, seed=None, name=None):
    """Returns a Tensor of samples from an LKJ distribution.

    Args:
      num_samples: Python `int`. The number of samples to draw.
      seed: Python integer seed for RNG
      name: Python `str` name prefixed to Ops created by this function.

    Returns:
      samples: A Tensor of correlation matrices with shape `[n, B, D, D]`,
        where `B` is the shape of the `concentration` parameter, and `D`
        is the `dimension`.

    Raises:
      ValueError: If `dimension` is negative.
    """
    if self.dimension < 0:
      raise ValueError(
          'Cannot sample negative-dimension correlation matrices.')
    # Notation below: B is the batch shape, i.e., tf.shape(concentration)
    seed = seed_stream.SeedStream(seed, 'sample_lkj')
    with tf.name_scope('sample_lkj', name, [self.concentration]):
      if not self.concentration.dtype.is_floating:
        raise TypeError('The concentration argument should have floating type,'
                        ' not {}'.format(self.concentration.dtype.name))

      concentration = _replicate(num_samples, self.concentration)
      concentration_shape = tf.shape(concentration)
      if self.dimension <= 1:
        # For any dimension <= 1, there is only one possible correlation matrix.
        shape = tf.concat([
            concentration_shape, [self.dimension, self.dimension]], axis=0)
        return tf.ones(shape=shape, dtype=self.concentration.dtype)
      beta_conc = concentration + (self.dimension - 2.) / 2.
      beta_dist = beta.Beta(concentration1=beta_conc, concentration0=beta_conc)

      # Note that the sampler below deviates from [1], by doing the sampling in
      # cholesky space. This does not change the fundamental logic of the
      # sampler, but does speed up the sampling.

      # This is the correlation coefficient between the first two dimensions.
      # This is also `r` in reference [1].
      corr12 = 2. * beta_dist.sample(seed=seed()) - 1.

      # Below we construct the Cholesky of the initial 2x2 correlation matrix,
      # which is of the form:
      # [[1, 0], [r, sqrt(1 - r**2)]], where r is the correlation between the
      # first two dimensions.
      # This is the top-left corner of the cholesky of the final sample.
      first_row = tf.concat([
          tf.ones_like(corr12)[..., tf.newaxis],
          tf.zeros_like(corr12)[..., tf.newaxis]], axis=-1)
      second_row = tf.concat([
          corr12[..., tf.newaxis],
          tf.sqrt(1 - corr12**2)[..., tf.newaxis]], axis=-1)

      chol_result = tf.concat([
          first_row[..., tf.newaxis, :],
          second_row[..., tf.newaxis, :]], axis=-2)

      for n in range(2, self.dimension):
        # Loop invariant: on entry, result has shape B + [n, n]
        beta_conc -= 0.5
        # norm is y in reference [1].
        norm = beta.Beta(
            concentration1=n/2.,
            concentration0=beta_conc
        ).sample(seed=seed())
        # distance shape: B + [1] for broadcast
        distance = tf.sqrt(norm)[..., tf.newaxis]
        # direction is u in reference [1].
        # direction shape: B + [n]
        direction = _uniform_unit_norm(
            n, concentration_shape, self.concentration.dtype, seed)
        # raw_correlation is w in reference [1].
        raw_correlation = distance * direction  # shape: B + [n]

        # This is the next row in the cholesky of the result,
        # which differs from the construction in reference [1].
        # In the reference, the new row `z` = chol_result @ raw_correlation^T
        # = C @ raw_correlation^T (where as short hand we use C = chol_result).
        # We prove that the below equation is the right row to add to the
        # cholesky, by showing equality with reference [1].
        # Let S be the sample constructed so far, and let `z` be as in
        # reference [1]. Then at this iteration, the new sample S' will be
        # [[S z^T]
        #  [z 1]]
        # In our case we have the cholesky decomposition factor C, so
        # we want our new row x (same size as z) to satisfy:
        #  [[S z^T]  [[C 0]    [[C^T  x^T]         [[CC^T  Cx^T]
        #   [z 1]] =  [x k]]    [0     k]]  =       [xC^t   xx^T + k**2]]
        # Since C @ raw_correlation^T = z = C @ x^T, and C is invertible,
        # we have that x = raw_correlation. Also 1 = xx^T + k**2, so k
        # = sqrt(1 - xx^T) = sqrt(1 - |raw_correlation|**2) = sqrt(1 -
        # distance**2).
        new_row = tf.concat(
            [raw_correlation, tf.sqrt(1. - norm[..., tf.newaxis])], axis=-1)

        # Finally add this new row, by growing the cholesky of the result.
        chol_result = tf.concat([
            chol_result,
            tf.zeros_like(chol_result[..., 0][..., tf.newaxis])], axis=-1)

        chol_result = tf.concat(
            [chol_result, new_row[..., tf.newaxis, :]], axis=-2)

      result = tf.matmul(chol_result, chol_result, transpose_b=True)
      # The diagonal for a correlation matrix should always be ones. Due to
      # numerical instability the matmul might not achieve that, so manually set
      # these to ones.
      result = tf.matrix_set_diag(result, tf.ones(
          shape=tf.shape(result)[:-1], dtype=result.dtype.base_dtype))
      # This sampling algorithm can produce near-PSD matrices on which standard
      # algorithms such as `tf.cholesky` or `tf.linalg.self_adjoint_eigvals`
      # fail. Specifically, as documented in b/116828694, around 2% of trials
      # of 900,000 5x5 matrices (distributed according to 9 different
      # concentration parameter values) contained at least one matrix on which
      # the Cholesky decomposition failed.
      return result
Exemple #55
0
 def _covariance(self):
   x = self._variance_scale_term() * self._mean()
   return tf.matrix_set_diag(
       -tf.matmul(x[..., tf.newaxis],
                  x[..., tf.newaxis, :]),  # outer prod
       self._variance())
Exemple #56
0
 def testInvalidShape(self):
   with self.assertRaisesRegexp(ValueError, "must be at least rank 2"):
     tf.matrix_set_diag(0, [0])
   with self.assertRaisesRegexp(ValueError, "must be at least rank 1"):
     tf.matrix_set_diag([[0]], 0)
def make_tril_scale(
    loc=None,
    scale_tril=None,
    scale_diag=None,
    scale_identity_multiplier=None,
    shape_hint=None,
    validate_args=False,
    assert_positive=False,
    name=None):
  """Creates a LinearOperator representing a lower triangular matrix.

  Args:
    loc: Floating-point `Tensor`. This is used for inferring shape in the case
      where only `scale_identity_multiplier` is set.
    scale_tril: Floating-point `Tensor` representing the diagonal matrix.
      `scale_diag` has shape [N1, N2, ...  k, k], which represents a k x k
      lower triangular matrix.
      When `None` no `scale_tril` term is added to the LinearOperator.
      The upper triangular elements above the diagonal are ignored.
    scale_diag: Floating-point `Tensor` representing the diagonal matrix.
      `scale_diag` has shape [N1, N2, ...  k], which represents a k x k
      diagonal matrix.
      When `None` no diagonal term is added to the LinearOperator.
    scale_identity_multiplier: floating point rank 0 `Tensor` representing a
      scaling done to the identity matrix.
      When `scale_identity_multiplier = scale_diag = scale_tril = None` then
      `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added
      to `scale`.
    shape_hint: scalar integer `Tensor` representing a hint at the dimension of
      the identity matrix when only `scale_identity_multiplier` is set.
    validate_args: Python `bool` indicating whether arguments should be
      checked for correctness.
    assert_positive: Python `bool` indicating whether LinearOperator should be
      checked for being positive definite.
    name: Python `str` name given to ops managed by this object.

  Returns:
    `LinearOperator` representing a lower triangular matrix.

  Raises:
    ValueError:  If only `scale_identity_multiplier` is set and `loc` and
      `shape_hint` are both None.
  """

  def _maybe_attach_assertion(x):
    if not validate_args:
      return x
    if assert_positive:
      return control_flow_ops.with_dependencies([
          tf.assert_positive(
              tf.matrix_diag_part(x), message="diagonal part must be positive"),
      ], x)
    return control_flow_ops.with_dependencies([
        tf.assert_none_equal(
            tf.matrix_diag_part(x),
            tf.zeros([], x.dtype),
            message="diagonal part must be non-zero"),
    ], x)

  with tf.name_scope(
      name,
      "make_tril_scale",
      values=[loc, scale_diag, scale_identity_multiplier]):

    loc = _convert_to_tensor(loc, name="loc")
    scale_tril = _convert_to_tensor(scale_tril, name="scale_tril")
    scale_diag = _convert_to_tensor(scale_diag, name="scale_diag")
    scale_identity_multiplier = _convert_to_tensor(
        scale_identity_multiplier,
        name="scale_identity_multiplier")

  if scale_tril is not None:
    scale_tril = tf.matrix_band_part(scale_tril, -1, 0)  # Zero out TriU.
    tril_diag = tf.matrix_diag_part(scale_tril)
    if scale_diag is not None:
      tril_diag += scale_diag
    if scale_identity_multiplier is not None:
      tril_diag += scale_identity_multiplier[..., tf.newaxis]

    scale_tril = tf.matrix_set_diag(scale_tril, tril_diag)

    return tf.linalg.LinearOperatorLowerTriangular(
        tril=_maybe_attach_assertion(scale_tril),
        is_non_singular=True,
        is_self_adjoint=False,
        is_positive_definite=assert_positive)

  return make_diag_scale(
      loc=loc,
      scale_diag=scale_diag,
      scale_identity_multiplier=scale_identity_multiplier,
      shape_hint=shape_hint,
      validate_args=validate_args,
      assert_positive=assert_positive,
      name=name)
Exemple #58
0
def LaplacianMatrix(lengths, arcs, forest=False):
  r"""Returns the (root-augmented) Laplacian matrix for a batch of digraphs.

  Args:
    lengths: [B] vector of input sequence lengths.
    arcs: [B,M,M] tensor of arc potentials where entry b,t,s is the potential of
      the arc from s to t in the b'th digraph, while b,t,t is the potential of t
      as a root.  Entries b,t,s where t or s >= lengths[b] are ignored.
    forest: Whether to produce a Laplacian for trees or forests.

  Returns:
    [B,M,M] tensor L with the Laplacian of each digraph, padded with an identity
    matrix.  More concretely, the padding entries (t or s >= lengths[b]) are:
      L_{b,t,t} = 1.0
      L_{b,t,s} = 0.0
    Note that this "identity matrix padding" ensures that the determinant of
    each padded matrix equals the determinant of the unpadded matrix.  The
    non-padding entries (t,s < lengths[b]) depend on whether the Laplacian is
    constructed for trees or forests.  For trees:
      L_{b,t,0} = arcs[b,t,t]
      L_{b,t,t} = \sum_{s < lengths[b], t != s} arcs[b,t,s]
      L_{b,t,s} = -arcs[b,t,s]
    For forests:
      L_{b,t,t} = \sum_{s < lengths[b]} arcs[b,t,s]
      L_{b,t,s} = -arcs[b,t,s]
    See http://www.aclweb.org/anthology/D/D07/D07-1015.pdf for details, though
    note that our matrices are transposed from their notation.
  """
  check.Eq(arcs.get_shape().ndims, 3, 'arcs must be rank 3')
  dtype = arcs.dtype.base_dtype

  arcs_shape = tf.shape(arcs)
  batch_size = arcs_shape[0]
  max_length = arcs_shape[1]
  with tf.control_dependencies([tf.assert_equal(max_length, arcs_shape[2])]):
    valid_arc_bxmxm, valid_token_bxm = ValidArcAndTokenMasks(
        lengths, max_length, dtype=dtype)
  invalid_token_bxm = tf.constant(1, dtype=dtype) - valid_token_bxm

  # Zero out all invalid arcs, to avoid polluting bulk summations.
  arcs_bxmxm = arcs * valid_arc_bxmxm

  zeros_bxm = tf.zeros([batch_size, max_length], dtype)
  if not forest:
    # For trees, extract the root potentials and exclude them from the sums
    # computed below.
    roots_bxm = tf.matrix_diag_part(arcs_bxmxm)  # only defined for trees
    arcs_bxmxm = tf.matrix_set_diag(arcs_bxmxm, zeros_bxm)

  # Sum inbound arc potentials for each target token.  These sums will form
  # the diagonal of the Laplacian matrix.  Note that these sums are zero for
  # invalid tokens, since their arc potentials were masked out above.
  sums_bxm = tf.reduce_sum(arcs_bxmxm, 2)

  if forest:
    # For forests, zero out the root potentials after computing the sums above
    # so we don't cancel them out when we subtract the arc potentials.
    arcs_bxmxm = tf.matrix_set_diag(arcs_bxmxm, zeros_bxm)

  # The diagonal of the result is the combination of the arc sums, which are
  # non-zero only on valid tokens, and the invalid token indicators, which are
  # non-zero only on invalid tokens.  Note that the latter form the diagonal
  # of the identity matrix padding.
  diagonal_bxm = sums_bxm + invalid_token_bxm

  # Combine sums and negative arc potentials.  Note that the off-diagonal
  # padding entries will be zero thanks to the arc mask.
  laplacian_bxmxm = tf.matrix_diag(diagonal_bxm) - arcs_bxmxm

  if not forest:
    # For trees, replace the first column with the root potentials.
    roots_bxmx1 = tf.expand_dims(roots_bxm, 2)
    laplacian_bxmxm = tf.concat([roots_bxmx1, laplacian_bxmxm[:, :, 1:]], 2)

  return laplacian_bxmxm
 def _inverse(self, y):
   diag = self._diag_bijector.inverse(tf.matrix_diag_part(y))
   return tf.matrix_set_diag(y, diag)