예제 #1
0
  def get_observation_model(self, times):
    """Construct observation model matrix from VARMA parameters.

    Args:
      times: A [batch size] vector indicating the times observation models are
          requested for. Unused.
    Returns:
      the observation model matrix. It has shape
        [self.num_features, self.state_dimension].
    """
    del times  # StateSpaceModel will broadcast along the batch dimension
    if self.ar_order > self.ma_order or self.state_num_blocks < 2:
      return array_ops.pad(
          linalg_ops.eye(self.num_features, dtype=self.dtype),
          [[0, 0], [0, self.num_features * (self.state_num_blocks - 1)]],
          name="observation_model")
    else:
      # Add a second observed component which "catches" the accumulated moving
      # average errors as they reach the end of the state. If ar_order >
      # ma_order, this is unnecessary, since accumulated errors cycle naturally.
      return array_ops.concat(
          [
              array_ops.pad(
                  linalg_ops.eye(self.num_features, dtype=self.dtype),
                  [[0, 0], [0,
                            self.num_features * (self.state_num_blocks - 2)]]),
              linalg_ops.eye(self.num_features, dtype=self.dtype)
          ],
          axis=1,
          name="observation_model")
예제 #2
0
 def Test(self):
   eye_np = np.eye(num_rows, M=num_columns, dtype=dtype.as_numpy_dtype)
   if batch_shape is not None:
     eye_np = np.tile(eye_np, batch_shape + [1, 1])
   for use_placeholder in False, True:
     if use_placeholder and (num_columns is None or batch_shape is None):
       return
     with self.test_session(use_gpu=True) as sess:
       if use_placeholder:
         num_rows_placeholder = array_ops.placeholder(
             dtypes.int32, name="num_rows")
         num_columns_placeholder = array_ops.placeholder(
             dtypes.int32, name="num_columns")
         batch_shape_placeholder = array_ops.placeholder(
             dtypes.int32, name="batch_shape")
         eye = linalg_ops.eye(
             num_rows_placeholder,
             num_columns=num_columns_placeholder,
             batch_shape=batch_shape_placeholder,
             dtype=dtype)
         eye_tf = sess.run(
             eye,
             feed_dict={
                 num_rows_placeholder: num_rows,
                 num_columns_placeholder: num_columns,
                 batch_shape_placeholder: batch_shape
             })
       else:
         eye_tf = linalg_ops.eye(
             num_rows,
             num_columns=num_columns,
             batch_shape=batch_shape,
             dtype=dtype).eval()
       self.assertAllEqual(eye_np, eye_tf)
예제 #3
0
 def testShapeInferenceStaticBatch(self):
   batch_shape = (2, 3)
   self.assertEqual(
       (2, 3, 2, 2),
       linalg_ops.eye(num_rows=2, batch_shape=batch_shape).shape)
   self.assertEqual(
       (2, 3, 2, 3),
       linalg_ops.eye(
           num_rows=2, num_columns=3, batch_shape=batch_shape).shape)
예제 #4
0
def transition_power_test_template(test_case, model, num_steps):
  """Tests the transition_to_powers function of a state space model."""
  transition_matrix = ops.convert_to_tensor(
      model.get_state_transition(), dtype=model.dtype)
  step_number = array_ops.placeholder(shape=[], dtype=dtypes.int64)
  state_dimension = transition_matrix.get_shape()[0].value
  previous_matrix = array_ops.placeholder(
      shape=[state_dimension, state_dimension], dtype=transition_matrix.dtype)
  true_single_step_update = math_ops.matmul(previous_matrix,
                                            transition_matrix)
  model_output_tensor = model.transition_to_powers(powers=array_ops.stack(
      [step_number, step_number]))
  with test_case.test_session():
    starting_matrix = linalg_ops.eye(
        state_dimension, batch_shape=array_ops.shape(num_steps)).eval()
    evaled_current_matrix = starting_matrix
    for iteration_number in range(num_steps):
      model_output = model_output_tensor.eval(
          feed_dict={step_number: iteration_number})
      test_case.assertAllClose(
          evaled_current_matrix,
          model_output[0],
          rtol=1e-8 if evaled_current_matrix.dtype == numpy.float64 else 1e-4)
      evaled_current_matrix = true_single_step_update.eval(
          feed_dict={previous_matrix: evaled_current_matrix})
예제 #5
0
  def _underdetermined(op, grad):
    """Gradients for the underdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the second
    kind:
      X = F(A, B) = A * (A*A^T + lambda*I)^{-1} * B
    that (for lambda=0) solve the least squares problem
      min ||X||_F subject to A*X = B.
    """
    a = op.inputs[0]
    b = op.inputs[1]
    l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
    a_shape = array_ops.shape(a)
    batch_shape = a_shape[:-2]
    m = a_shape[-2]

    identity = linalg_ops.eye(m, batch_shape=batch_shape, dtype=a.dtype)
    gramian = math_ops.matmul(a, a, adjoint_b=True) + l2_regularizer * identity
    chol = linalg_ops.cholesky(gramian)
    grad_b = linalg_ops.cholesky_solve(chol, math_ops.matmul(a, grad))
    # Temporary tmp = (A * A^T + lambda * I)^{-1} * B.
    tmp = linalg_ops.cholesky_solve(chol, b)
    a1 = math_ops.matmul(tmp, a, adjoint_a=True)
    a1 = -math_ops.matmul(grad_b, a1)
    a2 = grad - math_ops.matmul(a, grad_b, adjoint_a=True)
    a2 = math_ops.matmul(tmp, a2, adjoint_b=True)
    grad_a = a1 + a2
    return (grad_a, grad_b, None)
예제 #6
0
  def _compute_power_svd(self, var, mat_g, mat_g_size, alpha, mat_h_slot_name):
    """Computes mat_h = mat_g^alpha using svd. mat_g is a symmetric PSD matrix.

    Args:
      var: the variable we are updating.
      mat_g: the symmetric PSD matrix whose power it to be computed
      mat_g_size: size of mat_g
      alpha: a real number
      mat_h_slot_name: name of slot to store the power, if needed.

    Returns:
      mat_h = mat_g^alpha

    Stores mat_h in the appropriate slot, if it exists.
    Note that mat_g is PSD. So we could use linalg_ops.self_adjoint_eig.
    """
    if mat_g_size == 1:
      mat_h = math_ops.pow(mat_g + self._epsilon, alpha)
    else:
      damping = self._epsilon * linalg_ops.eye(math_ops.to_int32(mat_g_size))
      diag_d, mat_u, mat_v = linalg_ops.svd(mat_g + damping, full_matrices=True)
      mat_h = math_ops.matmul(
          mat_v * math_ops.pow(math_ops.maximum(diag_d, self._epsilon), alpha),
          array_ops.transpose(mat_u))
    if mat_h_slot_name is not None:
      return state_ops.assign(self.get_slot(var, mat_h_slot_name), mat_h)
    return mat_h
  def _operator_and_matrix(self, build_info, dtype, use_placeholder):
    shape = list(build_info.shape)
    assert shape[-1] == shape[-2]

    batch_shape = shape[:-2]
    num_rows = shape[-1]

    # Uniform values that are at least length 1 from the origin.  Allows the
    # operator to be well conditioned.
    # Shape batch_shape
    multiplier = linear_operator_test_util.random_sign_uniform(
        shape=batch_shape, minval=1., maxval=2., dtype=dtype)


    # Nothing to feed since LinearOperatorScaledIdentity takes no Tensor args.
    lin_op_multiplier = multiplier

    if use_placeholder:
      lin_op_multiplier = array_ops.placeholder_with_default(
          multiplier, shape=None)

    operator = linalg_lib.LinearOperatorScaledIdentity(
        num_rows, lin_op_multiplier)

    multiplier_matrix = array_ops.expand_dims(
        array_ops.expand_dims(multiplier, -1), -1)
    matrix = multiplier_matrix * linalg_ops.eye(
        num_rows, batch_shape=batch_shape, dtype=dtype)

    return operator, matrix
예제 #8
0
  def test_inv_update_thunks(self):
    """Ensures inverse update ops run once per global_step."""
    with self._graph.as_default(), self.test_session() as sess:
      fisher_estimator = estimator.FisherEstimator(
          damping_fn=lambda: 0.2,
          variables=[self.weights],
          layer_collection=self.layer_collection,
          cov_ema_decay=0.0)

      # Construct op that updates one inverse per global step.
      global_step = training_util.get_or_create_global_step()
      inv_matrices = [
          matrix
          for fisher_factor in self.layer_collection.get_factors()
          for matrix in fisher_factor._inverses_by_damping.values()
      ]
      inv_update_op_thunks = fisher_estimator.inv_update_thunks
      inv_update_op = control_flow_ops.case(
          [(math_ops.equal(global_step, i), thunk)
           for i, thunk in enumerate(inv_update_op_thunks)])
      increment_global_step = global_step.assign_add(1)

      sess.run(variables.global_variables_initializer())
      initial_inv_values = sess.run(inv_matrices)

      # Ensure there's one update per inverse matrix. This is true as long as
      # there's no fan-in/fan-out or parameter re-use.
      self.assertEqual(len(inv_matrices), len(inv_update_op_thunks))

      # Test is no-op if only 1 invariance matrix.
      assert len(inv_matrices) > 1

      # Assign each covariance matrix a value other than the identity. This
      # ensures that the inverse matrices are updated to something different as
      # well.
      cov_matrices = [
          fisher_factor.get_cov()
          for fisher_factor in self.layer_collection.get_factors()
      ]
      sess.run([
          cov_matrix.assign(2 * linalg_ops.eye(int(cov_matrix.shape[0])))
          for cov_matrix in cov_matrices
      ])

      for i in range(len(inv_matrices)):
        # Compare new and old inverse values
        new_inv_values = sess.run(inv_matrices)
        is_inv_equal = [
            np.allclose(initial_inv_value, new_inv_value)
            for (initial_inv_value,
                 new_inv_value) in zip(initial_inv_values, new_inv_values)
        ]
        num_inv_equal = sum(is_inv_equal)

        # Ensure exactly one inverse matrix changes per step.
        self.assertEqual(num_inv_equal, len(inv_matrices) - i)

        # Run all inverse update ops.
        sess.run(inv_update_op)
        sess.run(increment_global_step)
  def _operator_and_mat_and_feed_dict(self, shape, dtype, use_placeholder):
    shape = list(shape)
    assert shape[-1] == shape[-2]

    batch_shape = shape[:-2]
    num_rows = shape[-1]

    # Uniform values that are at least length 1 from the origin.  Allows the
    # operator to be well conditioned.
    # Shape batch_shape
    multiplier = linear_operator_test_util.random_sign_uniform(
        shape=batch_shape, minval=1., maxval=2., dtype=dtype)

    operator = linalg_lib.LinearOperatorScaledIdentity(num_rows, multiplier)

    # Nothing to feed since LinearOperatorScaledIdentity takes no Tensor args.
    if use_placeholder:
      multiplier_ph = array_ops.placeholder(dtype=dtype)
      multiplier = multiplier.eval()
      operator = linalg_lib.LinearOperatorScaledIdentity(
          num_rows, multiplier_ph)
      feed_dict = {multiplier_ph: multiplier}
    else:
      feed_dict = None

    multiplier_matrix = array_ops.expand_dims(
        array_ops.expand_dims(multiplier, -1), -1)
    mat = multiplier_matrix * linalg_ops.eye(
        num_rows, batch_shape=batch_shape, dtype=dtype)

    return operator, mat, feed_dict
예제 #10
0
def power_sums_tensor(array_size, power_matrix, multiplier):
  r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..(array_size + 1).

  Args:
    array_size: The number of non-trivial sums to pre-compute.
    power_matrix: The "A" matrix above.
    multiplier: The "B" matrix above
  Returns:
    A Tensor with S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T
      S[0] is the zero matrix
      S[1] is B
      S[2] is A B A^T + B
      ...and so on
  """
  array_size = math_ops.cast(array_size, dtypes.int32)
  power_matrix = ops.convert_to_tensor(power_matrix)
  identity_like_power_matrix = linalg_ops.eye(
      array_ops.shape(power_matrix)[0], dtype=power_matrix.dtype)
  identity_like_power_matrix.set_shape(
      ops.convert_to_tensor(power_matrix).get_shape())
  transition_powers = functional_ops.scan(
      lambda previous_power, _: math_ops.matmul(previous_power, power_matrix),
      math_ops.range(array_size - 1),
      initializer=identity_like_power_matrix)
  summed = math_ops.cumsum(
      array_ops.concat([
          array_ops.expand_dims(multiplier, 0), math_ops.matmul(
              batch_times_matrix(transition_powers, multiplier),
              transition_powers,
              adjoint_b=True)
      ], 0))
  return array_ops.concat(
      [array_ops.expand_dims(array_ops.zeros_like(multiplier), 0), summed], 0)
예제 #11
0
  def _overdetermined(op, grad):
    """Gradients for the overdetermined case of MatrixSolveLs.

    This is the backprop for the solution to the normal equations of the first
    kind:
       X = F(A, B) = (A^T * A + lambda * I)^{-1} * A^T * B
    which solve the least squares problem
       min ||A * X - B||_F^2 + lambda ||X||_F^2.
    """
    a = op.inputs[0]
    b = op.inputs[1]
    l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
    x = op.outputs[0]
    a_shape = array_ops.shape(a)
    batch_shape = a_shape[:-2]
    n = a_shape[-1]

    identity = linalg_ops.eye(n, batch_shape=batch_shape, dtype=a.dtype)
    gramian = math_ops.matmul(a, a, adjoint_a=True) + l2_regularizer * identity
    chol = linalg_ops.cholesky(gramian)
    # Temporary z = (A^T * A + lambda * I)^{-1} * grad.
    z = linalg_ops.cholesky_solve(chol, grad)
    xzt = math_ops.matmul(x, z, adjoint_b=True)
    zx_sym = xzt + array_ops.matrix_transpose(xzt)
    grad_a = -math_ops.matmul(a, zx_sym) + math_ops.matmul(b, z, adjoint_b=True)
    grad_b = math_ops.matmul(a, z)
    return (grad_a, grad_b, None)
예제 #12
0
 def test_non_batch_2x2(self):
   num_rows = 2
   dtype = np.float32
   np_eye = np.eye(num_rows).astype(dtype)
   with self.test_session():
     eye = linalg_ops.eye(num_rows, dtype=dtype)
     self.assertAllEqual((num_rows, num_rows), eye.get_shape())
     self.assertAllEqual(np_eye, eye.eval())
예제 #13
0
  def _create_slots(self, var_list):
    for v in var_list:
      with ops.colocate_with(v):
        _ = self._zeros_slot(v, "gbar", self._name)
        shape = np.array(v.get_shape())
        for i, d in enumerate(shape):
          d_tensor = ops.convert_to_tensor(d)
          if d <= self._max_matrix_size:
            mat_g_init = array_ops.zeros_like(linalg_ops.eye(d_tensor))
            if self._svd_interval > 1:
              _ = self._get_or_make_slot(v, linalg_ops.eye(d_tensor),
                                         "H_" + str(i), self._name)
          else:
            mat_g_init = array_ops.zeros([d_tensor])

          _ = self._get_or_make_slot(v, mat_g_init, "Gbar_" + str(i),
                                     self._name)
예제 #14
0
 def test_non_batch_0x0(self):
   num_rows = 0
   dtype = np.int64
   np_eye = np.eye(num_rows).astype(dtype)
   with self.test_session(use_gpu=True):
     eye = linalg_ops.eye(num_rows, dtype=dtype)
     self.assertAllEqual((num_rows, num_rows), eye.get_shape())
     self.assertAllEqual(np_eye, eye.eval())
예제 #15
0
  def _verifyLu(self, x, output_idx_type=dtypes.int64):
    # Verify that Px = LU.
    lu, perm = linalg_ops.lu(x, output_idx_type=output_idx_type)

    # Prepare the lower factor of shape num_rows x num_rows
    lu_shape = np.array(lu.shape.as_list())
    batch_shape = lu_shape[:-2]
    num_rows = lu_shape[-2]
    num_cols = lu_shape[-1]

    lower = array_ops.matrix_band_part(lu, -1, 0)

    if num_rows > num_cols:
      eye = linalg_ops.eye(
          num_rows, batch_shape=batch_shape, dtype=lower.dtype)
      lower = array_ops.concat([lower, eye[..., num_cols:]], axis=-1)
    elif num_rows < num_cols:
      lower = lower[..., :num_rows]

    # Fill the diagonal with ones.
    ones_diag = array_ops.ones(
        np.append(batch_shape, num_rows), dtype=lower.dtype)
    lower = array_ops.matrix_set_diag(lower, ones_diag)

    # Prepare the upper factor.
    upper = array_ops.matrix_band_part(lu, 0, -1)

    verification = math_ops.matmul(lower, upper)

    # Permute the rows of product of the Cholesky factors.
    if num_rows > 0:
      # Reshape the product of the triangular factors and permutation indices
      # to a single batch dimension. This makes it easy to apply
      # invert_permutation and gather_nd ops.
      perm_reshaped = array_ops.reshape(perm, [-1, num_rows])
      verification_reshaped = array_ops.reshape(verification,
                                                [-1, num_rows, num_cols])
      # Invert the permutation in each batch.
      inv_perm_reshaped = map_fn.map_fn(array_ops.invert_permutation,
                                        perm_reshaped)
      batch_size = perm_reshaped.shape.as_list()[0]
      # Prepare the batch indices with the same shape as the permutation.
      # The corresponding batch index is paired with each of the `num_rows`
      # permutation indices.
      batch_indices = math_ops.cast(
          array_ops.broadcast_to(
              math_ops.range(batch_size)[:, None], perm_reshaped.shape),
          dtype=output_idx_type)
      permuted_verification_reshaped = array_ops.gather_nd(
          verification_reshaped,
          array_ops.stack([batch_indices, inv_perm_reshaped], axis=-1))

      # Reshape the verification matrix back to the original shape.
      verification = array_ops.reshape(permuted_verification_reshaped,
                                       lu_shape)

    self._verifyLuBase(x, lower, upper, perm, verification,
                       output_idx_type)
예제 #16
0
  def test_cholesky(self):
    z = random_ops.random_normal([2, 3, 3])
    x = (math_ops.matmul(z, array_ops.matrix_transpose(z))  # Ensure pos. def.
         + linalg_ops.eye(3))  # Ensure well-conditioned.

    def loop_fn(i):
      return linalg_ops.cholesky(array_ops.gather(x, i))

    self._test_loop_fn(loop_fn, 2)
예제 #17
0
 def test_non_batch_0x2(self):
   num_rows = 0
   num_columns = 2
   dtype = np.int64
   np_eye = np.eye(num_rows, num_columns).astype(dtype)
   with self.test_session():
     eye = linalg_ops.eye(num_rows, num_columns=num_columns, dtype=dtype)
     self.assertAllEqual((num_rows, num_columns), eye.get_shape())
     self.assertAllEqual(np_eye, eye.eval())
예제 #18
0
def TriAngInvCompositeGrad(l, grad):
  num_rows = array_ops.shape(l)[-1]
  batch_shape = array_ops.shape(l)[:-2]
  l_inverse = linalg_ops.matrix_triangular_solve(l,
                                                 linalg_ops.eye(
                                                     num_rows,
                                                     batch_shape=batch_shape,
                                                     dtype=l.dtype))
  return _GradWithInverseL(l, l_inverse, grad)
예제 #19
0
 def test_non_batch_2x3(self):
   num_rows = 2
   num_columns = 3
   dtype = np.float32
   np_eye = np.eye(num_rows, num_columns).astype(dtype)
   with self.test_session(use_gpu=True):
     eye = linalg_ops.eye(num_rows, num_columns=num_columns, dtype=dtype)
     self.assertAllEqual((num_rows, num_columns), eye.get_shape())
     self.assertAllEqual(np_eye, eye.eval())
예제 #20
0
 def test_eye_no_placeholder(self, num_rows, num_columns, batch_shape, dtype):
   eye_np = np.eye(num_rows, M=num_columns, dtype=dtype.as_numpy_dtype)
   if batch_shape is not None:
     eye_np = np.tile(eye_np, batch_shape + [1, 1])
   eye_tf = self.evaluate(linalg_ops.eye(
       num_rows,
       num_columns=num_columns,
       batch_shape=batch_shape,
       dtype=dtype))
   self.assertAllEqual(eye_np, eye_tf)
예제 #21
0
  def testLossFunctionWithoutName(self):
    """Ensure loss functions get unique names if 'name' not specified."""
    with ops.Graph().as_default():
      logits = linalg_ops.eye(2)
      lc = layer_collection.LayerCollection()

      # Create a new loss function with default names.
      lc.register_categorical_predictive_distribution(logits)
      lc.register_categorical_predictive_distribution(logits)
      self.assertEqual(2, len(lc.losses))
예제 #22
0
def _ctc_state_trans(label_seq):
  """Compute CTC alignment model transition matrix.

  Args:
    label_seq: tensor of shape [batch_size, max_seq_length]

  Returns:
    tensor of shape [batch_size, states, states] with a state transition matrix
    computed for each sequence of the batch.
  """

  with ops.name_scope("ctc_state_trans"):
    label_seq = ops.convert_to_tensor(label_seq, name="label_seq")
    batch_size = _get_dim(label_seq, 0)
    num_labels = _get_dim(label_seq, 1)

    num_label_states = num_labels + 1
    num_states = 2 * num_label_states

    label_states = math_ops.range(num_label_states)
    blank_states = label_states + num_label_states

    # Start state to first label.
    start_to_label = [[1, 0]]

    # Blank to label transitions.
    blank_to_label = array_ops.stack([label_states[1:], blank_states[:-1]], 1)

    # Label to blank transitions.
    label_to_blank = array_ops.stack([blank_states, label_states], 1)

    # Scatter transitions that don't depend on sequence.
    indices = array_ops.concat(
        [start_to_label, blank_to_label, label_to_blank], 0)
    values = array_ops.ones([_get_dim(indices, 0)])
    trans = array_ops.scatter_nd(
        indices, values, shape=[num_states, num_states])
    trans += linalg_ops.eye(num_states)  # Self-loops.

    # Label to label transitions. Disallow transitions between repeated labels
    # with no blank state in between.
    batch_idx = array_ops.zeros_like(label_states[2:])
    indices = array_ops.stack(
        [batch_idx, label_states[2:], label_states[1:-1]], 1)
    indices = array_ops.tile(
        array_ops.expand_dims(indices, 0), [batch_size, 1, 1])
    batch_idx = array_ops.expand_dims(math_ops.range(batch_size), 1) * [1, 0, 0]
    indices += array_ops.expand_dims(batch_idx, 1)
    repeats = math_ops.equal(label_seq[:, :-1], label_seq[:, 1:])
    values = 1.0 - math_ops.cast(repeats, dtypes.float32)
    batched_shape = [batch_size, num_states, num_states]
    label_to_label = array_ops.scatter_nd(indices, values, batched_shape)

    return array_ops.expand_dims(trans, 0) + label_to_label
예제 #23
0
 def __call__(self, shape, dtype=None, partition_info=None):
   full_shape = shape if partition_info is None else partition_info.full_shape
   if len(full_shape) != 2:
     raise ValueError(
         "Identity matrix initializer can only be used for 2D matrices.")
   if dtype is None:
     dtype = self.dtype
   initializer = linalg_ops.eye(*full_shape, dtype=dtype)
   if partition_info is not None:
     initializer = array_ops.slice(initializer, partition_info.var_offset,
                                   shape)
   return self.gain * initializer
  def _operator_and_matrix(self, build_info, dtype, use_placeholder):
    shape = list(build_info.shape)
    assert shape[-1] == shape[-2]

    batch_shape = shape[:-2]
    num_rows = shape[-1]

    operator = linalg_lib.LinearOperatorIdentity(
        num_rows, batch_shape=batch_shape, dtype=dtype)
    mat = linalg_ops.eye(num_rows, batch_shape=batch_shape, dtype=dtype)

    return operator, mat
예제 #25
0
 def test_1x3_batch_4x4(self):
   num_rows = 4
   batch_shape = [1, 3]
   dtype = np.float32
   np_eye = np.eye(num_rows).astype(dtype)
   with self.test_session():
     eye = linalg_ops.eye(num_rows, batch_shape=batch_shape, dtype=dtype)
     self.assertAllEqual(batch_shape + [num_rows, num_rows], eye.get_shape())
     eye_v = eye.eval()
     for i in range(batch_shape[0]):
       for j in range(batch_shape[1]):
         self.assertAllEqual(np_eye, eye_v[i, j, :, :])
예제 #26
0
def _matrix_exp_pade3(matrix):
  """3rd-order Pade approximant for matrix exponential."""
  b = [120.0, 60.0, 12.0]
  b = [constant_op.constant(x, matrix.dtype) for x in b]
  ident = linalg_ops.eye(array_ops.shape(matrix)[-2],
                         batch_shape=array_ops.shape(matrix)[:-2],
                         dtype=matrix.dtype)
  matrix_2 = math_ops.matmul(matrix, matrix)
  tmp = matrix_2 + b[1] * ident
  matrix_u = math_ops.matmul(matrix, tmp)
  matrix_v = b[2] * matrix_2 + b[0] * ident
  return matrix_u, matrix_v
예제 #27
0
 def test_1x3_batch_0x0(self):
   num_rows = 0
   batch_shape = [1, 3]
   dtype = np.float32
   np_eye = np.eye(num_rows).astype(dtype)
   with self.test_session(use_gpu=True):
     eye = linalg_ops.eye(num_rows, batch_shape=batch_shape, dtype=dtype)
     self.assertAllEqual((1, 3, 0, 0), eye.get_shape())
     eye_v = eye.eval()
     for i in range(batch_shape[0]):
       for j in range(batch_shape[1]):
         self.assertAllEqual(np_eye, eye_v[i, j, :, :])
예제 #28
0
 def test_posterior_from_prior_state_multivariate_3d(self):
   self._posterior_from_prior_state_test_template(
       state=constant_op.constant([[1.9, 1., 5.]]),
       state_var=constant_op.constant(
           [[[200., 0., 1.], [0., 2000., 0.], [1., 0., 40000.]]]),
       observation=constant_op.constant([[1., 1., 3.]]),
       observation_model=constant_op.constant(
           [[[0.5, 0., 0.],
             [0., 10., 0.],
             [0., 0., 100.]]]),
       observation_noise=linalg_ops.eye(3) / 10000.,
       expected_state=numpy.array([[2., .1, .03]]),
       expected_state_var=numpy.zeros([1, 3, 3]))
예제 #29
0
def _matrix_exp_pade5(matrix):
  """5th-order Pade approximant for matrix exponential."""
  b = [30240.0, 15120.0, 3360.0, 420.0, 30.0]
  b = [constant_op.constant(x, matrix.dtype) for x in b]
  ident = linalg_ops.eye(array_ops.shape(matrix)[-2],
                         batch_shape=array_ops.shape(matrix)[:-2],
                         dtype=matrix.dtype)
  matrix_2 = math_ops.matmul(matrix, matrix)
  matrix_4 = math_ops.matmul(matrix_2, matrix_2)
  tmp = matrix_4 + b[3] * matrix_2 + b[1] * ident
  matrix_u = math_ops.matmul(matrix, tmp)
  matrix_v = b[4] * matrix_4 + b[2] * matrix_2 + b[0] * ident
  return matrix_u, matrix_v
예제 #30
0
  def testRegisterCategoricalPredictiveDistribution(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      logits = linalg_ops.eye(2)

      lc = layer_collection.LayerCollection()
      lc.register_categorical_predictive_distribution(logits, seed=200)
      single_loss = sess.run(lc.total_sampled_loss())

      lc2 = layer_collection.LayerCollection()
      lc2.register_categorical_predictive_distribution(logits, seed=200)
      lc2.register_categorical_predictive_distribution(logits, seed=200)
      double_loss = sess.run(lc2.total_sampled_loss())
      self.assertAlmostEqual(2 * single_loss, double_loss)
 def _to_dense(self):
     return linalg_ops.eye(num_rows=self.domain_dimension_dynamic(),
                           batch_shape=self.batch_shape_dynamic(),
                           dtype=self.dtype)
예제 #32
0
 def testShapeInferenceNoBatch(self):
     self.assertEqual((2, 2), linalg_ops.eye(num_rows=2).shape)
     self.assertEqual((2, 3),
                      linalg_ops.eye(num_rows=2, num_columns=3).shape)
예제 #33
0
    def _compute_power_iter(self,
                            var,
                            mat_g,
                            mat_g_size,
                            alpha,
                            mat_h_slot_name,
                            iter_count=100,
                            epsilon=1e-6):
        """Computes mat_g^alpha, where alpha = -1/p, p a positive integer.

    We use an iterative Schur-Newton method from equation 3.2 on page 9 of:

    A Schur-Newton Method for the Matrix p-th Root and its Inverse
    by Chun-Hua Guo and Nicholas J. Higham
    SIAM Journal on Matrix Analysis and Applications,
    2006, Vol. 28, No. 3 : pp. 788-804
    https://pdfs.semanticscholar.org/0abe/7f77433cf5908bfe2b79aa91af881da83858.pdf

    Args:
      var: the variable we are updating.
      mat_g: the symmetric PSD matrix whose power it to be computed
      mat_g_size: size of mat_g.
      alpha: exponent, must be -1/p for p a positive integer.
      mat_h_slot_name: name of slot to store the power, if needed.
      iter_count: Maximum number of iterations.
      epsilon: accuracy indicator, useful for early termination.

    Returns:
      mat_g^alpha
    """

        identity = linalg_ops.eye(math_ops.to_int32(mat_g_size))

        def MatPower(mat_m, p):
            """Computes mat_m^p, for p a positive integer.

      Power p is known at graph compile time, so no need for loop and cond.
      Args:
        mat_m: a square matrix
        p: a positive integer

      Returns:
        mat_m^p
      """
            assert p == int(p) and p > 0
            power = None
            while p > 0:
                if p % 2 == 1:
                    power = math_ops.matmul(
                        mat_m, power) if power is not None else mat_m
                p //= 2
                mat_m = math_ops.matmul(mat_m, mat_m)
            return power

        def IterCondition(i, mat_m, _):
            return math_ops.logical_and(
                i < iter_count,
                math_ops.reduce_max(math_ops.abs(mat_m - identity)) > epsilon)

        def IterBody(i, mat_m, mat_x):
            mat_m_i = (1 - alpha) * identity + alpha * mat_m
            return (i + 1,
                    math_ops.matmul(MatPower(mat_m_i, -1.0 / alpha),
                                    mat_m), math_ops.matmul(mat_x, mat_m_i))

        if mat_g_size == 1:
            mat_h = math_ops.pow(mat_g + self._epsilon, alpha)
        else:
            damped_mat_g = mat_g + self._epsilon * identity
            z = (1 - 1 / alpha) / (2 * linalg_ops.norm(damped_mat_g))
            # The best value for z is
            # (1 - 1/alpha) * (c_max^{-alpha} - c_min^{-alpha}) /
            #                 (c_max^{1-alpha} - c_min^{1-alpha})
            # where c_max and c_min are the largest and smallest singular values of
            # damped_mat_g.
            # The above estimate assumes that c_max > c_min * 2^p. (p = -1/alpha)
            # Can replace above line by the one below, but it is less accurate,
            # hence needs more iterations to converge.
            # z = (1 - 1/alpha) / math_ops.trace(damped_mat_g)
            # If we want the method to always converge, use z = 1 / norm(damped_mat_g)
            # or z = 1 / math_ops.trace(damped_mat_g), but these can result in many
            # extra iterations.
            _, _, mat_h = control_flow_ops.while_loop(
                IterCondition, IterBody,
                [0, damped_mat_g * z, identity * math_ops.pow(z, -alpha)])
        if mat_h_slot_name is not None:
            return state_ops.assign(self.get_slot(var, mat_h_slot_name), mat_h)
        return mat_h
예제 #34
0
파일: utils.py 프로젝트: Utsal20/poGANmon
def extract_convolution_patches(inputs,
                                filter_shape,
                                padding,
                                strides=None,
                                dilation_rate=None,
                                name=None,
                                data_format=None):
    """Extracts inputs to each output coordinate in tf.nn.convolution.

  This is a generalization of tf.extract_image_patches() to tf.nn.convolution(),
  where the number of spatial dimensions may be something other than 2.

  Assumes,
  - First dimension of inputs is batch_size
  - Convolution filter is applied to all input channels.

  Args:
    inputs: Tensor of shape [batch_size, ..spatial_image_shape..,
      ..spatial_filter_shape.., in_channels]. Inputs to tf.nn.convolution().
    filter_shape: List of ints. Shape of filter passed to tf.nn.convolution().
    padding: string. Padding method. One of "VALID", "SAME".
    strides: None or list of ints. Strides along spatial dimensions.
    dilation_rate: None or list of ints. Dilation along spatial dimensions.
    name: None or str. Name of Op.
    data_format: None or str. Format of data.

  Returns:
    Tensor of shape [batch_size, ..spatial_image_shape..,
      ..spatial_filter_shape.., in_channels]

  Raises:
    ValueError: If data_format does not put channel last.
    ValueError: If inputs and filter disagree on in_channels.
  """
    if not is_data_format_channel_last(data_format):
        raise ValueError("Channel must be last dimension.")
    with ops.name_scope(
            name, "extract_convolution_patches",
        [inputs, filter_shape, padding, strides, dilation_rate]):
        batch_size = inputs.shape.as_list()[0]
        in_channels = inputs.shape.as_list()[-1]

        # filter_shape = spatial_filter_shape + [in_channels, out_channels]
        spatial_filter_shape = filter_shape[:-2]
        if in_channels != filter_shape[-2]:
            raise ValueError(
                "inputs and filter_shape must agree on in_channels.")

        # Map each input feature to a location in the output.
        out_channels = np.prod(spatial_filter_shape) * in_channels
        filters = linalg_ops.eye(out_channels)
        filters = array_ops.reshape(
            filters,
            list(spatial_filter_shape) + [in_channels, out_channels])

        result = nn_ops.convolution(inputs,
                                    filters,
                                    padding=padding,
                                    strides=strides,
                                    dilation_rate=dilation_rate)
        spatial_output_shape = result.shape.as_list()[1:-1]
        result = array_ops.reshape(result,
                                   [batch_size or -1] + spatial_output_shape +
                                   list(spatial_filter_shape) + [in_channels])

        return result
    def posterior_from_prior_state(self, prior_state, prior_state_var,
                                   observation, observation_model,
                                   predicted_observations, observation_noise):
        """Compute a posterior over states given an observation.

    Args:
      prior_state: Prior state mean [batch size x state dimension]
      prior_state_var: Prior state covariance [batch size x state dimension x
          state dimension]
      observation: The observed value corresponding to the predictions given
          [batch size x observation dimension]
      observation_model: The [batch size x observation dimension x model state
          dimension] Tensor indicating how a particular state is mapped to
          (pre-noise) observations for each part of the batch.
      predicted_observations: An (observation mean, observation variance) tuple
          computed based on the current state, usually the output of
          observed_from_state.
      observation_noise: A [batch size x observation dimension x observation
          dimension] or [observation dimension x observation dimension] Tensor
          with covariance matrices to use for each part of the batch (a
          two-dimensional input will be broadcast).
    Returns:
      Posterior mean and covariance (dimensions matching the first two
      arguments).

    """
        observed_mean, observed_var = predicted_observations
        residual = observation - observed_mean
        # TODO(allenl): Can more of this be done using matrix_solve_ls?
        kalman_solve_rhs = math_ops.matmul(observation_model,
                                           prior_state_var,
                                           adjoint_b=True)
        # This matrix_solve adjoint doesn't make a difference symbolically (since
        # observed_var is a covariance matrix, and should be symmetric), but
        # filtering on multivariate series is unstable without it. See
        # test_multivariate_symmetric_covariance_float64 in kalman_filter_test.py
        # for an example of the instability (fails with adjoint=False).
        kalman_gain_transposed = linalg_ops.matrix_solve(matrix=observed_var,
                                                         rhs=kalman_solve_rhs,
                                                         adjoint=True)
        posterior_state = prior_state + array_ops.squeeze(math_ops.matmul(
            kalman_gain_transposed,
            array_ops.expand_dims(residual, -1),
            adjoint_a=True),
                                                          axis=[-1])
        gain_obs = math_ops.matmul(kalman_gain_transposed,
                                   observation_model,
                                   adjoint_a=True)
        identity_extradim = linalg_ops.eye(array_ops.shape(gain_obs)[1],
                                           dtype=gain_obs.dtype)[None]
        identity_minus_factor = identity_extradim - gain_obs
        if self._simplified_posterior_covariance_computation:
            # posterior covariance =
            #   (I - kalman_gain * observation_model) * prior_state_var
            posterior_state_var = math_ops.matmul(identity_minus_factor,
                                                  prior_state_var)
        else:
            observation_noise = ops.convert_to_tensor(observation_noise)
            # A Joseph form update, which provides better numeric stability than the
            # simplified optimal Kalman gain update, at the cost of a few extra
            # operations. Joseph form updates are valid for any gain (not just the
            # optimal Kalman gain), and so are more forgiving of numerical errors in
            # computing the optimal Kalman gain.
            #
            # posterior covariance =
            #   (I - kalman_gain * observation_model) * prior_state_var
            #     * (I - kalman_gain * observation_model)^T
            #   + kalman_gain * observation_noise * kalman_gain^T
            left_multiplied_state_var = math_ops.matmul(
                identity_minus_factor, prior_state_var)
            multiplied_state_var = math_ops.matmul(identity_minus_factor,
                                                   left_multiplied_state_var,
                                                   adjoint_b=True)

            def _batch_observation_noise_update():
                return (multiplied_state_var + math_ops.matmul(
                    math_ops.matmul(kalman_gain_transposed,
                                    observation_noise,
                                    adjoint_a=True), kalman_gain_transposed))

            def _matrix_observation_noise_update():
                return (multiplied_state_var + math_ops.matmul(
                    math_utils.batch_times_matrix(
                        kalman_gain_transposed, observation_noise, adj_x=True),
                    kalman_gain_transposed))

            if observation_noise.get_shape().ndims is None:
                posterior_state_var = control_flow_ops.cond(
                    math_ops.equal(array_ops.rank(observation_noise),
                                   2), _matrix_observation_noise_update,
                    _batch_observation_noise_update)
            else:
                # If static shape information exists, it gets checked in each cond()
                # branch, so we need a special case to avoid graph-build-time
                # exceptions.
                if observation_noise.get_shape().ndims == 2:
                    posterior_state_var = _matrix_observation_noise_update()
                else:
                    posterior_state_var = _batch_observation_noise_update()
        return posterior_state, posterior_state_var
 def _forward(self, x):
   with ops.control_dependencies(self._assertions(x)):
     shape = array_ops.shape(x)
     return linalg_ops.matrix_triangular_solve(
         x, linalg_ops.eye(shape[-1], batch_shape=shape[:-2]), lower=True)
예제 #37
0
파일: ggt.py 프로젝트: neuroph12/CNNDDDD
    def _finish(self, state):
        var_dtype = self._variables[0].dtype.base_dtype
        # Update global step.
        global_step = self._get_global_step(state)
        update_global_step = state_ops.assign_add(global_step, 1.)

        # Update the first moment estimate.
        beta1 = state.get_hyper("beta1", dtype=var_dtype)
        moment1 = self._get_moment1(state)
        flat_grad = self._get_flat_grad(state)
        # moment1_t := beta1 * moment1_{t-1} + (1 - beta1) * flat_grad_t
        update_moment1 = moment1.assign(beta1 * moment1 +
                                        (1. - beta1) * flat_grad)

        # Update the gradient buffer.
        window = state.get_hyper("window")
        grad_buffer = self._get_grad_buffer(state)
        next_grad_index = math_ops.floormod(
            math_ops.to_int32(update_global_step - 1.), window)
        # grad_buffer[(t-1) % window] := moment1_t
        update_grad_buffer = state_ops.scatter_update(grad_buffer,
                                                      next_grad_index,
                                                      update_moment1)

        # Compute the update step.
        eps = state.get_hyper("eps", dtype=var_dtype)
        svd_eps = state.get_hyper("svd_eps", dtype=var_dtype)
        sigma_eps = state.get_hyper("sigma_eps", dtype=var_dtype)
        lr = state.get_hyper("lr", dtype=var_dtype)
        denom = math_ops.sqrt(
            math_ops.minimum(
                ops.convert_to_tensor(update_global_step),
                ops.convert_to_tensor(math_ops.cast(window, dtype=var_dtype))))
        moment1_2d = array_ops.expand_dims(update_moment1, -1)

        # m = grad_buffer^T / sqrt(min(t, window))
        # m has shape [model dimension, window], where model dimension is the sum
        # of the dimensions of the flattened variables.
        m = array_ops.transpose(math_ops.divide(update_grad_buffer, denom))

        # sigma, u, _ = SVD(m^Tm + I * svd_eps)
        mm = math_ops.matmul(m, m, transpose_a=True)
        damping = math_ops.cast(linalg_ops.eye(window),
                                dtype=var_dtype) * svd_eps
        sigma, u, _ = linalg_ops.svd(mm + damping)
        sigma_sqrt = math_ops.sqrt(sigma)
        sigma_sqrt_min = math_ops.reduce_min(sigma_sqrt)

        # sigma_sqrt_inv = 1 / (\sqrt{sigma} + sigma_eps) ^ 3
        # We add sigma_eps to alleviate numerical instability.
        # Note that (m^Tm)^(-3/2) = u diag(sigma_sqrt_inv) u^T.
        sigma_sqrt_inv = math_ops.divide(
            math_ops.cast(1.0, dtype=var_dtype),
            math_ops.pow(sigma_sqrt + sigma_eps, 3))

        # In full matrix AdaGrad, the update step computes (mm^T)^(-1/2)g, where the
        # inversion of a model dimension by model dimension matrix is needed. To
        # speed up this computation we calculate the following instead:
        # m(m^Tm)^(-3/2)m^T moment1 = m u diag(sigma_sqrt_inv) u^T m^T moment1.
        new_step = array_ops.expand_dims(
            array_ops.zeros(flat_grad.get_shape(), dtype=var_dtype), -1)
        head = math_ops.matmul(
            m,
            math_ops.matmul(
                u,
                math_ops.matmul(
                    array_ops.diag(sigma_sqrt_inv),
                    math_ops.matmul(u,
                                    math_ops.matmul(m,
                                                    moment1_2d,
                                                    transpose_a=True),
                                    transpose_a=True))))

        # When inverting (mm^t)^(1/2), we also add epsilon * I regularization for
        # degenerate cases. We expand ((mm^t)^(1/2) + epsilon * I)^(-1) using
        # Woodbury's identity.
        # For full derivation please see paper at
        # https://arxiv.org/pdf/1806.02958.pdf
        tail = moment1_2d - math_ops.matmul(
            m,
            math_ops.matmul(
                u,
                math_ops.matmul(
                    array_ops.diag(
                        math_ops.divide(math_ops.cast(1.0, dtype=var_dtype),
                                        sigma)),
                    math_ops.matmul(u,
                                    math_ops.matmul(
                                        m, moment1_2d, transpose_a=True),
                                    transpose_a=True))))
        scaled_tail = math_ops.divide(tail, sigma_sqrt_min)

        update_new_step = control_flow_ops.cond(
            sigma_sqrt_min > eps, lambda: math_ops.add(head, scaled_tail),
            lambda: math_ops.add(new_step, head))

        # Update each variable.
        update_step = []
        for var in self._variables:
            dim = self.shape_dict[var.name]
            start_index = self.index_dict[var.name]
            end_index = start_index + dim
            var_update_correct_shape = array_ops.reshape(
                update_new_step[start_index:end_index], var.get_shape())
            var_updated = state_ops.assign_sub(var,
                                               lr * var_update_correct_shape)
            update_step.append(var_updated)

        return control_flow_ops.group(update_step)
 def get_noise_transform(self):
   return linalg_ops.eye(1, dtype=self.dtype)
예제 #39
0
 def __call__(self, shape, dtype=None, partition_info=None):
     if dtype is None:
         dtype = self.dtype
     return linalg_ops.eye(shape[0], shape[1], dtype=dtype)
def tridiag(d, diag_value, offdiag_value):
    """d x d matrix with given value on diag, and one super/sub diag."""
    diag_mat = linalg_ops.eye(d) * (diag_value - offdiag_value)
    three_bands = array_ops.matrix_band_part(
        array_ops.fill([d, d], offdiag_value), 1, 1)
    return diag_mat + three_bands
예제 #41
0
파일: utils.py 프로젝트: Utsal20/poGANmon
def posdef_inv(tensor, damping):
    """Computes the inverse of tensor + damping * identity."""
    identity = linalg_ops.eye(tensor.shape.as_list()[0], dtype=tensor.dtype)
    damping = math_ops.cast(damping, dtype=tensor.dtype)
    return posdef_inv_functions[POSDEF_INV_METHOD](tensor, identity, damping)
 def get_state_transition(self):
   return linalg_ops.eye(1, dtype=self.dtype)
예제 #43
0
def _MatrixSquareRootGrad(op, grad):
    """Gradient for MatrixSquareRoot."""

    # Let A be an m x m square matrix (or batch of matrices)
    # Let R = sqrtm(A)
    # By definition, A = RR
    # Take the differential: dA = d(RR) = RdR + dRR
    # Solve the resulting Sylvester equation for dR

    # Used to find Kronecker products within the Sylvester equation
    def _KroneckerProduct(b1, b2):
        """Computes the Kronecker product of two batches of square matrices."""
        b1_shape = array_ops.shape(b1)
        b2_shape = array_ops.shape(b2)
        b1_order = b1_shape[-1]
        b2_order = b2_shape[-1]

        shape_slice_size = [math_ops.subtract(array_ops.size(b1_shape), 2)]
        shape_slice = array_ops.slice(
            b1_shape, [0], shape_slice_size)  # Same for both batches
        b1_reshape_shape = array_ops.concat(
            [shape_slice, [b1_order], [1], [b1_order], [1]], 0)
        b2_reshape_shape = array_ops.concat(
            [shape_slice, [1], [b2_order], [1], [b2_order]], 0)

        b1_reshape = array_ops.reshape(b1, b1_reshape_shape)
        b2_reshape = array_ops.reshape(b2, b2_reshape_shape)

        order_prod = b1_order * b2_order
        kprod_shape = array_ops.concat(
            [shape_slice, [order_prod], [order_prod]], 0)
        return array_ops.reshape(b1_reshape * b2_reshape, kprod_shape)

    sqrtm = op.outputs[0]  # R
    shape = array_ops.shape(sqrtm)
    order = shape[-1]  # m
    matrix_count = math_ops.reduce_prod(shape[0:-2])

    # Get batch of m x m identity matrices
    eye = linalg_ops.eye(order, dtype=sqrtm.dtype)  # m x m identity matrix
    eye_flat = array_ops.reshape(eye, [-1])
    eye_tiled = array_ops.tile(eye_flat, [matrix_count])
    eye_batch = array_ops.reshape(eye_tiled, shape)

    # The transpose of R is taken in the k1 term instead of k2 in
    # order to prevent redundant transposition of R (i.e. (R')' = R)
    sqrtm_transpose = array_ops.matrix_transpose(sqrtm)
    k1 = _KroneckerProduct(eye_batch, sqrtm_transpose)
    k2 = _KroneckerProduct(sqrtm, eye_batch)
    ksum = math_ops.add(k1, k2)

    # Vectorize dA
    shape_slice_size = [math_ops.subtract(array_ops.size(shape), 2)]
    shape_slice = array_ops.slice(shape, [0], shape_slice_size)
    shape_vec_da = array_ops.concat([shape_slice, [order * order], [1]], 0)
    vec_da = array_ops.reshape(array_ops.matrix_transpose(grad), shape_vec_da)

    # Solve for vec(dR)
    vec_dsqrtm = linalg_ops.matrix_solve(ksum, vec_da)

    # Solve for dR by inverse vectorizing vec(dR)
    dsqrtm_transpose = array_ops.reshape(vec_dsqrtm, shape)
    return array_ops.matrix_transpose(dsqrtm_transpose)
예제 #44
0
    def _verifyLu(self, x, output_idx_type=dtypes.int64):
        # Verify that Px = LU.
        with test_util.use_gpu():

            lu, perm = linalg_ops.lu(x, output_idx_type=output_idx_type)

            # Prepare the lower factor of shape num_rows x num_rows
            lu_shape = np.array(lu.shape.as_list())
            batch_shape = lu_shape[:-2]
            num_rows = lu_shape[-2]
            num_cols = lu_shape[-1]

            lower = array_ops.matrix_band_part(lu, -1, 0)

            if num_rows > num_cols:
                eye = linalg_ops.eye(num_rows,
                                     batch_shape=batch_shape,
                                     dtype=lower.dtype)
                lower = array_ops.concat([lower, eye[..., num_cols:]], axis=-1)
            elif num_rows < num_cols:
                lower = lower[..., :num_rows]

            # Fill the diagonal with ones.
            ones_diag = array_ops.ones(np.append(batch_shape, num_rows),
                                       dtype=lower.dtype)
            lower = array_ops.matrix_set_diag(lower, ones_diag)

            # Prepare the upper factor.
            upper = array_ops.matrix_band_part(lu, 0, -1)

            verification = math_ops.matmul(lower, upper)

            # Permute the rows of product of the Cholesky factors.
            if num_rows > 0:
                # Reshape the product of the triangular factors and permutation indices
                # to a single batch dimension. This makes it easy to apply
                # invert_permutation and gather_nd ops.
                perm_reshaped = array_ops.reshape(perm, [-1, num_rows])
                verification_reshaped = array_ops.reshape(
                    verification, [-1, num_rows, num_cols])
                # Invert the permutation in each batch.
                inv_perm_reshaped = map_fn.map_fn(array_ops.invert_permutation,
                                                  perm_reshaped)
                batch_size = perm_reshaped.shape.as_list()[0]
                # Prepare the batch indices with the same shape as the permutation.
                # The corresponding batch index is paired with each of the `num_rows`
                # permutation indices.
                batch_indices = math_ops.cast(array_ops.broadcast_to(
                    math_ops.range(batch_size)[:, None], perm_reshaped.shape),
                                              dtype=output_idx_type)
                permuted_verification_reshaped = array_ops.gather_nd(
                    verification_reshaped,
                    array_ops.stack([batch_indices, inv_perm_reshaped],
                                    axis=-1))

                # Reshape the verification matrix back to the original shape.
                verification = array_ops.reshape(
                    permuted_verification_reshaped, lu_shape)

            self._verifyLuBase(x, lower, upper, perm, verification,
                               output_idx_type)
예제 #45
0
def TriAngInvCompositeGrad(l, grad):
  num_rows = array_ops.shape(l)[-1]
  batch_shape = array_ops.shape(l)[:-2]
  l_inverse = linalg_ops.matrix_triangular_solve(
      l, linalg_ops.eye(num_rows, batch_shape=batch_shape, dtype=l.dtype))
  return _GradWithInverseL(l, l_inverse, grad)
예제 #46
0
def _solve_interpolation(train_points, train_values, order,
                         regularization_weight):
    """Solve for interpolation coefficients.
  Computes the coefficients of the polyharmonic interpolant for the 'training'
  data defined by (train_points, train_values) using the kernel phi.
  Args:
    train_points: `[b, n, d]` interpolation centers
    train_values: `[b, n, k]` function values
    order: order of the interpolation
    regularization_weight: weight to place on smoothness regularization term
  Returns:
    w: `[b, n, k]` weights on each interpolation center
    v: `[b, d, k]` weights on each input dimension
  Raises:
    ValueError: if d or k is not fully specified.
  """

    # These dimensions are set dynamically at runtime.
    b, n, _ = array_ops.unstack(array_ops.shape(train_points), num=3)

    d = train_points.shape[-1]
    if d.value is None:
        raise ValueError('The dimensionality of the input points (d) must be '
                         'statically-inferrable.')

    k = train_values.shape[-1]
    if k.value is None:
        raise ValueError('The dimensionality of the output values (k) must be '
                         'statically-inferrable.')

    # First, rename variables so that the notation (c, f, w, v, A, B, etc.)
    # follows https://en.wikipedia.org/wiki/Polyharmonic_spline.
    # To account for python style guidelines we use
    # matrix_a for A and matrix_b for B.

    c = train_points
    f = train_values

    # Next, construct the linear system.
    with ops.name_scope('construct_linear_system'):

        matrix_a = _phi(_pairwise_squared_distance_matrix(c),
                        order)  # [b, n, n]
        if regularization_weight > 0:
            batch_identity_matrix = array_ops.expand_dims(
                linalg_ops.eye(n, dtype=c.dtype), 0)
            matrix_a += regularization_weight * batch_identity_matrix

        # Append ones to the feature values for the bias term in the linear model.
        ones = array_ops.ones_like(c[..., :1], dtype=c.dtype)
        matrix_b = array_ops.concat([c, ones], 2)  # [b, n, d + 1]

        # [b, n + d + 1, n]
        left_block = array_ops.concat(
            [matrix_a, array_ops.transpose(matrix_b, [0, 2, 1])], 1)

        num_b_cols = matrix_b.get_shape()[2]  # d + 1
        lhs_zeros = array_ops.zeros([b, num_b_cols, num_b_cols],
                                    train_points.dtype)
        right_block = array_ops.concat([matrix_b, lhs_zeros],
                                       1)  # [b, n + d + 1, d + 1]
        lhs = array_ops.concat([left_block, right_block],
                               2)  # [b, n + d + 1, n + d + 1]

        rhs_zeros = array_ops.zeros([b, d + 1, k], train_points.dtype)
        rhs = array_ops.concat([f, rhs_zeros], 1)  # [b, n + d + 1, k]

    # Then, solve the linear system and unpack the results.
    with ops.name_scope('solve_linear_system'):
        w_v = linalg_ops.matrix_solve(lhs, rhs)
        w = w_v[:, :n, :]
        v = w_v[:, n:, :]

    return w, v
예제 #47
0
    def test_inv_update_thunks(self):
        """Ensures inverse update ops run once per global_step."""
        with self._graph.as_default(), self.test_session() as sess:
            fisher_estimator = estimator.FisherEstimatorRoundRobin(
                variables=[self.weights],
                layer_collection=self.layer_collection,
                damping=0.2,
                cov_ema_decay=0.0)

            # Construct op that updates one inverse per global step.
            global_step = training_util.get_or_create_global_step()
            (cov_variable_thunks, _, inv_variable_thunks, inv_update_op_thunks
             ) = fisher_estimator.create_ops_and_vars_thunks()
            for thunk in cov_variable_thunks:
                thunk()
            for thunk in inv_variable_thunks:
                thunk()
            inv_matrices = [
                matrix
                for fisher_factor in self.layer_collection.get_factors() for
                matrix in fisher_factor._matpower_by_exp_and_damping.values()
            ]
            inv_update_op = control_flow_ops.case([
                (math_ops.equal(global_step, i), thunk)
                for i, thunk in enumerate(inv_update_op_thunks)
            ])
            increment_global_step = global_step.assign_add(1)

            sess.run(variables.global_variables_initializer())
            initial_inv_values = sess.run(inv_matrices)

            # Ensure there's one update per inverse matrix. This is true as long as
            # there's no fan-in/fan-out or parameter re-use.
            self.assertEqual(len(inv_matrices), len(inv_update_op_thunks))

            # Test is no-op if only 1 invariance matrix.
            assert len(inv_matrices) > 1

            # Assign each covariance matrix a value other than the identity. This
            # ensures that the inverse matrices are updated to something different as
            # well.
            cov_matrices = [
                fisher_factor.get_cov()
                for fisher_factor in self.layer_collection.get_factors()
            ]
            sess.run([
                cov_matrix.assign(2 * linalg_ops.eye(int(cov_matrix.shape[0])))
                for cov_matrix in cov_matrices
            ])

            for i in range(len(inv_matrices)):
                # Compare new and old inverse values
                new_inv_values = sess.run(inv_matrices)
                is_inv_equal = [
                    np.allclose(initial_inv_value, new_inv_value)
                    for (initial_inv_value, new_inv_value
                         ) in zip(initial_inv_values, new_inv_values)
                ]
                num_inv_equal = sum(is_inv_equal)

                # Ensure exactly one inverse matrix changes per step.
                self.assertEqual(num_inv_equal, len(inv_matrices) - i)

                # Run all inverse update ops.
                sess.run(inv_update_op)
                sess.run(increment_global_step)
예제 #48
0
  def __init__(self,
               input_rows,
               input_cols,
               n_components,
               unobserved_weight=0.1,
               regularization=None,
               row_init="random",
               col_init="random",
               num_row_shards=1,
               num_col_shards=1,
               row_weights=1,
               col_weights=1,
               use_factors_weights_cache=True,
               use_gramian_cache=True,
               use_scoped_vars=False):
    """Creates model for WALS matrix factorization.

    Args:
      input_rows: total number of rows for input matrix.
      input_cols: total number of cols for input matrix.
      n_components: number of dimensions to use for the factors.
      unobserved_weight: weight given to unobserved entries of matrix.
      regularization: weight of L2 regularization term. If None, no
        regularization is done.
      row_init: initializer for row factor. Can be a tensor or numpy constant.
        If set to "random", the value is initialized randomly.
      col_init: initializer for column factor. See row_init for details.
      num_row_shards: number of shards to use for row factors.
      num_col_shards: number of shards to use for column factors.
      row_weights: Must be in one of the following three formats: None, a list
        of lists of non-negative real numbers (or equivalent iterables) or a
        single non-negative real number.
        - When set to None, w_ij = unobserved_weight, which simplifies to ALS.
        Note that col_weights must also be set to "None" in this case.
        - If it is a list of lists of non-negative real numbers, it needs to be
        in the form of [[w_0, w_1, ...], [w_k, ... ], [...]], with the number of
        inner lists matching the number of row factor shards and the elements in
        each inner list are the weights for the rows of the corresponding row
        factor shard. In this case,  w_ij = unobserved_weight +
                                            row_weights[i] * col_weights[j].
        - If this is a single non-negative real number, this value is used for
        all row weights and \\(w_ij\\) = unobserved_weight + row_weights *
                                   col_weights[j].
        Note that it is allowed to have row_weights as a list while col_weights
        a single number or vice versa.
      col_weights: See row_weights.
      use_factors_weights_cache: When True, the factors and weights will be
        cached on the workers before the updates start. Defaults to True. Note
        that the weights cache is initialized through `worker_init`, and the
        row/col factors cache is initialized through
        `initialize_{col/row}_update_op`. In the case where the weights are
        computed outside and set before the training iterations start, it is
        important to ensure the `worker_init` op is run afterwards for the
        weights cache to take effect.
      use_gramian_cache: When True, the Gramians will be cached on the workers
        before the updates start. Defaults to True.
      use_scoped_vars: When True, the factor and weight vars will also be nested
        in a tf.name_scope.
    """
    self._input_rows = input_rows
    self._input_cols = input_cols
    self._num_row_shards = num_row_shards
    self._num_col_shards = num_col_shards
    self._n_components = n_components
    self._unobserved_weight = unobserved_weight
    self._regularization = regularization
    self._regularization_matrix = (
        regularization * linalg_ops.eye(self._n_components)
        if regularization is not None else None)
    assert (row_weights is None) == (col_weights is None)
    self._use_factors_weights_cache = use_factors_weights_cache
    self._use_gramian_cache = use_gramian_cache

    if use_scoped_vars:
      with ops.name_scope("row_weights"):
        self._row_weights = WALSModel._create_weights(
            row_weights, self._input_rows, self._num_row_shards, "row_weights")
      with ops.name_scope("col_weights"):
        self._col_weights = WALSModel._create_weights(
            col_weights, self._input_cols, self._num_col_shards, "col_weights")
      with ops.name_scope("row_factors"):
        self._row_factors = self._create_factors(
            self._input_rows, self._n_components, self._num_row_shards,
            row_init, "row_factors")
      with ops.name_scope("col_factors"):
        self._col_factors = self._create_factors(
            self._input_cols, self._n_components, self._num_col_shards,
            col_init, "col_factors")
    else:
      self._row_weights = WALSModel._create_weights(
          row_weights, self._input_rows, self._num_row_shards, "row_weights")
      self._col_weights = WALSModel._create_weights(
          col_weights, self._input_cols, self._num_col_shards, "col_weights")
      self._row_factors = self._create_factors(
          self._input_rows, self._n_components, self._num_row_shards, row_init,
          "row_factors")
      self._col_factors = self._create_factors(
          self._input_cols, self._n_components, self._num_col_shards, col_init,
          "col_factors")

    self._row_gramian = self._create_gramian(self._n_components, "row_gramian")
    self._col_gramian = self._create_gramian(self._n_components, "col_gramian")
    with ops.name_scope("row_prepare_gramian"):
      self._row_update_prep_gramian = self._prepare_gramian(
          self._col_factors, self._col_gramian)
    with ops.name_scope("col_prepare_gramian"):
      self._col_update_prep_gramian = self._prepare_gramian(
          self._row_factors, self._row_gramian)
    with ops.name_scope("transient_vars"):
      self._create_transient_vars()
def posdef_inv_cholesky(tensor, reg_mat, damping):
    """Computes inverse(tensor + damping * reg_mat) with Cholesky."""
    chol = linalg_ops.cholesky(tensor + damping * reg_mat)
    identity = linalg_ops.eye(tf.shape(tensor)[0], dtype=tensor.dtype)
    return linalg_ops.cholesky_solve(chol, identity)