Esempio n. 1
0
def calculate_reshape(original_shape, new_shape, validate=False, name=None):
  """Calculates the reshaped dimensions (replacing up to one -1 in reshape)."""
  batch_shape_static = tensor_util.constant_value_as_shape(new_shape)
  if batch_shape_static.is_fully_defined():
    return np.int32(batch_shape_static.as_list()), batch_shape_static, []
  with ops.name_scope(name, "calculate_reshape", [original_shape, new_shape]):
    original_size = math_ops.reduce_prod(original_shape)
    implicit_dim = math_ops.equal(new_shape, -1)
    size_implicit_dim = (
        original_size // math_ops.maximum(1, -math_ops.reduce_prod(new_shape)))
    new_ndims = array_ops.shape(new_shape)
    expanded_new_shape = array_ops.where(  # Assumes exactly one `-1`.
        implicit_dim, array_ops.fill(new_ndims, size_implicit_dim), new_shape)
    validations = [] if not validate else [
        check_ops.assert_rank(
            original_shape, 1, message="Original shape must be a vector."),
        check_ops.assert_rank(
            new_shape, 1, message="New shape must be a vector."),
        check_ops.assert_less_equal(
            math_ops.count_nonzero(implicit_dim, dtype=dtypes.int32),
            1,
            message="At most one dimension can be unknown."),
        check_ops.assert_positive(
            expanded_new_shape, message="Shape elements must be >=-1."),
        check_ops.assert_equal(
            math_ops.reduce_prod(expanded_new_shape),
            original_size,
            message="Shape sizes do not match."),
    ]
    return expanded_new_shape, batch_shape_static, validations
Esempio n. 2
0
def _MeanGrad(op, grad):
    """Gradient for Mean."""
    sum_grad = _SumGrad(op, grad)[0]
    input_shape = array_ops.shape(op.inputs[0])
    output_shape = array_ops.shape(op.outputs[0])
    factor = _safe_shape_div(math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape))
    return sum_grad / math_ops.cast(factor, sum_grad.dtype), None
  def test_docstring_example(self):
    # Produce the first 1000 members of the Halton sequence in 3 dimensions.
    num_results = 1000
    dim = 3
    with self.test_session():
      sample = halton.sample(dim, num_results=num_results, randomized=False)

      # Evaluate the integral of x_1 * x_2^2 * x_3^3  over the three dimensional
      # hypercube.
      powers = math_ops.range(1.0, limit=dim + 1)
      integral = math_ops.reduce_mean(
          math_ops.reduce_prod(sample ** powers, axis=-1))
      true_value = 1.0 / math_ops.reduce_prod(powers + 1.0)

      # Produces a relative absolute error of 1.7%.
      self.assertAllClose(integral.eval(), true_value.eval(), rtol=0.02)

      # Now skip the first 1000 samples and recompute the integral with the next
      # thousand samples. The sequence_indices argument can be used to do this.

      sequence_indices = math_ops.range(start=1000, limit=1000 + num_results,
                                        dtype=dtypes.int32)
      sample_leaped = halton.sample(dim, sequence_indices=sequence_indices,
                                    randomized=False)

      integral_leaped = math_ops.reduce_mean(
          math_ops.reduce_prod(sample_leaped ** powers, axis=-1))
      self.assertAllClose(integral_leaped.eval(), true_value.eval(), rtol=0.05)
Esempio n. 4
0
def validate_init_args(
    distribution,
    batch_shape,
    validate_args,
    batch_shape_static):
  """Helper to __init__ which makes or raises assertions."""
  with ops.name_scope(name="validate_init_args",
                      values=[batch_shape] + distribution._graph_parents):  # pylint: disable=protected-access
    runtime_assertions = []

    if batch_shape.shape.ndims is not None:
      if batch_shape.shape.ndims != 1:
        raise ValueError("`batch_shape` must be a vector "
                         "(saw rank: {}).".format(
                             batch_shape.shape.ndims))
    elif validate_args:
      runtime_assertions += [
          check_ops.assert_rank(
              batch_shape,
              1,
              message="`batch_shape` must be a vector.",
              name="assert_batch_shape_is_vector"),
      ]

    batch_size_static = np.prod(batch_shape_static)
    dist_batch_size_static = (
        None if not distribution.batch_shape.is_fully_defined()
        else np.prod(distribution.batch_shape).value)

    if batch_size_static is not None and dist_batch_size_static is not None:
      if batch_size_static != dist_batch_size_static:
        raise ValueError("`batch_shape` size ({}) must match "
                         "`distribution.batch_shape` size ({}).".format(
                             batch_size_static,
                             dist_batch_size_static))
    elif validate_args:
      runtime_assertions += [
          check_ops.assert_equal(
              math_ops.reduce_prod(batch_shape),
              math_ops.reduce_prod(distribution.batch_shape_tensor()),
              message=("`batch_shape` size must match "
                       "`distributions.batch_shape` size."),
              name="assert_batch_size"),
      ]

    if batch_shape_static is not None:
      if np.any(batch_shape_static < 1):
        raise ValueError("`batch_shape` elements must be positive "
                         "(i.e., larger than zero).")
    elif validate_args:
      runtime_assertions += [
          check_ops.assert_positive(
              batch_shape,
              message=("`batch_shape` elements must be positive "
                       "(i.e., larger than zero)."),
              name="assert_batch_shape_positive")
      ]

    return runtime_assertions
Esempio n. 5
0
def _MeanGrad(op, grad):
  """Gradient for Mean."""
  sum_grad = _SumGrad(op, grad)[0]
  input_shape = array_ops.shape(op.inputs[0])
  output_shape = array_ops.shape(op.outputs[0])
  # TODO(apassos) remove this device hackery as eager copy to device becomes
  # more seamless.
  with ops.colocate_with(input_shape):
    factor = _safe_shape_div(
        math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape))
  if context.in_eager_mode():
    factor = factor._copy(device_name=sum_grad.device)  # pylint: disable=protected-access
  return sum_grad / math_ops.cast(factor, sum_grad.dtype), None
Esempio n. 6
0
def _MeanGrad(op, grad):
  """Gradient for Mean."""
  sum_grad = _SumGrad(op, grad)[0]
  input_size = op.inputs[0].get_shape().num_elements()
  output_size = op.outputs[0].get_shape().num_elements()
  if input_size is not None and output_size is not None:
    factor = input_size // max(output_size, 1)
    factor = constant_op.constant(factor, dtype=sum_grad.dtype)
  else:
    input_shape = array_ops.shape(op.inputs[0])
    output_shape = array_ops.shape(op.outputs[0])
    factor = _safe_shape_div(
        math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape))
  return sum_grad / math_ops.cast(factor, sum_grad.dtype), None
Esempio n. 7
0
  def sample(self, sample_shape=(), seed=None, name="sample"):
    """Generate samples of the specified shape.

    Note that a call to `sample()` without arguments will generate a single
    sample.

    Args:
      sample_shape: Rank 1 `int32` `Tensor`. Shape of the generated samples.
      seed: Python integer seed for RNG
      name: name to give to the op.

    Returns:
      samples: a `Tensor` with prepended dimensions `sample_shape`.
    """
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[sample_shape]):
        sample_shape = ops.convert_to_tensor(sample_shape,
                                             dtype=dtypes.int32,
                                             name="sample_shape")
        total = math_ops.reduce_prod(sample_shape)
        samples = self.sample_n(total, seed)
        output_shape = array_ops.concat(0, [sample_shape, array_ops.slice(
            array_ops.shape(samples), [1], [-1])])
        output = array_ops.reshape(samples, output_shape, name=name)
        output.set_shape(tensor_util.constant_value_as_shape(
            sample_shape).concatenate(samples.get_shape()[1:]))
    return output
 def _entropy(self):
   if (not self.distribution.is_continuous or
       not self.bijector.is_constant_jacobian):
     raise NotImplementedError("entropy is not implemented")
   # Suppose Y = g(X) where g is a diffeomorphism and X is a continuous rv. It
   # can be shown that:
   #   H[Y] = H[X] + E_X[(log o abs o det o J o g)(X)].
   # If is_constant_jacobian then:
   #   E_X[(log o abs o det o J o g)(X)] = (log o abs o det o J o g)(c)
   # where c can by anything.
   entropy = self.distribution.entropy()
   if self._is_maybe_event_override:
     # H[X] = sum_i H[X_i] if X_i are mutually independent.
     # This means that a reduce_sum is a simple rescaling.
     entropy *= math_ops.cast(math_ops.reduce_prod(self._override_event_shape),
                              dtype=entropy.dtype.base_dtype)
   if self._is_maybe_batch_override:
     new_shape = array_ops.concat([
         _ones_like(self._override_batch_shape),
         self.distribution.batch_shape_tensor()
     ], 0)
     entropy = array_ops.reshape(entropy, new_shape)
     multiples = array_ops.concat([
         self._override_batch_shape,
         _ones_like(self.distribution.batch_shape_tensor())
     ], 0)
     entropy = array_ops.tile(entropy, multiples)
   dummy = array_ops.zeros([], self.dtype)
   entropy -= self.bijector.inverse_log_det_jacobian(dummy)
   entropy.set_shape(self.batch_shape)
   return entropy
Esempio n. 9
0
def _flip_vector_to_matrix_dynamic(vec, batch_shape):
  """flip_vector_to_matrix with dynamic shapes."""
  # Shapes associated with batch_shape
  batch_rank = array_ops.size(batch_shape)

  # Shapes associated with vec.
  vec = ops.convert_to_tensor(vec, name="vec")
  vec_shape = array_ops.shape(vec)
  vec_rank = array_ops.rank(vec)
  vec_batch_rank = vec_rank - 1

  m = vec_batch_rank - batch_rank
  # vec_shape_left = [M1,...,Mm] or [].
  vec_shape_left = array_ops.slice(vec_shape, [0], [m])
  # If vec_shape_left = [], then condensed_shape = [1] since reduce_prod([]) = 1
  # If vec_shape_left = [M1,...,Mm], condensed_shape = [M1*...*Mm]
  condensed_shape = [math_ops.reduce_prod(vec_shape_left)]
  k = array_ops.gather(vec_shape, vec_rank - 1)
  new_shape = array_ops.concat(0, (batch_shape, [k], condensed_shape))

  def _flip_front_dims_to_back():
    # Permutation corresponding to [N1,...,Nn] + [k, M1,...,Mm]
    perm = array_ops.concat(
        0, (math_ops.range(m, vec_rank), math_ops.range(0, m)))
    return array_ops.transpose(vec, perm=perm)

  x_flipped = control_flow_ops.cond(
      math_ops.less(0, m),
      _flip_front_dims_to_back,
      lambda: array_ops.expand_dims(vec, -1))

  return array_ops.reshape(x_flipped, new_shape)
Esempio n. 10
0
  def _expand_sample_shape_to_vector(self, x, name):
    """Helper to `sample` which ensures input is 1D."""
    x_static_val = tensor_util.constant_value(x)
    if x_static_val is None:
      prod = math_ops.reduce_prod(x)
    else:
      prod = np.prod(x_static_val, dtype=x.dtype.as_numpy_dtype())

    ndims = x.get_shape().ndims  # != sample_ndims
    if ndims is None:
      # Maybe expand_dims.
      ndims = array_ops.rank(x)
      expanded_shape = util.pick_vector(
          math_ops.equal(ndims, 0),
          np.array([1], dtype=np.int32), array_ops.shape(x))
      x = array_ops.reshape(x, expanded_shape)
    elif ndims == 0:
      # Definitely expand_dims.
      if x_static_val is not None:
        x = ops.convert_to_tensor(
            np.array([x_static_val], dtype=x.dtype.as_numpy_dtype()),
            name=name)
      else:
        x = array_ops.reshape(x, [1])
    elif ndims != 1:
      raise ValueError("Input is neither scalar nor vector.")

    return x, prod
Esempio n. 11
0
 def testDegenerate(self):
   with self.test_session(use_gpu=True):
     for dtype in (dtypes.float16, dtypes.float32, dtypes.float64):
       # A large number is needed to get Eigen to die
       x = array_ops.zeros((0, 9938), dtype=dtype)
       y = math_ops.reduce_prod(x, [0])
       self.assertAllEqual(y.eval(), np.ones(9938))
  def test_tensor_array_grad(self):
    inp = constant_op.constant(np.random.rand(3, 4, 2), dtype=dtypes.float32)
    ta = tensor_array_ops.TensorArray(dtypes.float32, size=3)
    ta = ta.unstack(inp)

    def loop_fn(i):

      def body(j, x):
        value = ta.gather([j])
        value = array_ops.gather(array_ops.reshape(value, [4, 2]), i)
        return j + 1, x + value

      _, out = control_flow_ops.while_loop(lambda j, _: j < 3, body,
                                           (0, array_ops.zeros([2])))
      out = math_ops.reduce_prod(out)
      return out, gradient_ops.gradients(out, inp)[0]

    pfor_out, pfor_out_grad = pfor_control_flow_ops.pfor(loop_fn, 4)
    # Note that tf.while_loop does not work in the setup above. So we manually
    # construct the equivalent computation of the above loops here.
    real_out = math_ops.reduce_sum(inp, axis=[0])
    real_out = math_ops.reduce_prod(real_out, axis=[1])
    # Note that gradients of real_out will accumulate the gradients across the
    # output value. Hence we do the same aggregation on pfor_out_grad.
    real_out_grad = gradient_ops.gradients(real_out, inp)[0]
    sum_pfor_out_grad = math_ops.reduce_sum(pfor_out_grad, axis=[0])

    with session.Session() as sess:
      v1, v2, v1_grad, v2_grad = sess.run(
          [pfor_out, real_out, sum_pfor_out_grad, real_out_grad])
      self.assertAllClose(v1, v2)
      self.assertAllClose(v1_grad, v2_grad)
  def _unblockify_then_matricize(self, vec):
    """Flatten the block dimensions then reshape to a batch matrix."""
    # Suppose
    #   vec.shape = [v0, v1, v2, v3],
    #   self.block_depth = 2.
    # Then
    #   leading shape = [v0, v1]
    #   block shape = [v2, v3].
    # We will reshape vec to
    #   [v1, v2*v3, v0].

    # Un-blockify: Flatten block dimensions.  Reshape
    #   [v0, v1, v2, v3] --> [v0, v1, v2*v3].
    if vec.get_shape().is_fully_defined():
      # vec_shape = [v0, v1, v2, v3]
      vec_shape = vec.get_shape().as_list()
      # vec_leading_shape = [v0, v1]
      vec_leading_shape = vec_shape[:-self.block_depth]
      # vec_block_shape = [v2, v3]
      vec_block_shape = vec_shape[-self.block_depth:]
      # flat_shape = [v0, v1, v2*v3]
      flat_shape = vec_leading_shape + [np.prod(vec_block_shape)]
    else:
      vec_shape = array_ops.shape(vec)
      vec_leading_shape = vec_shape[:-self.block_depth]
      vec_block_shape = vec_shape[-self.block_depth:]
      flat_shape = array_ops.concat(
          (vec_leading_shape, [math_ops.reduce_prod(vec_block_shape)]), 0)
    vec_flat = array_ops.reshape(vec, flat_shape)

    # Matricize:  Reshape to batch matrix.
    #   [v0, v1, v2*v3] --> [v1, v2*v3, v0],
    # representing a shape [v1] batch of [v2*v3, v0] matrices.
    matrix = distribution_util.rotate_transpose(vec_flat, shift=-1)
    return matrix
Esempio n. 14
0
def sequences_loss(logits, targets, weights, num_decoders,
	average_across_timesteps=True, average_across_batch=True,
	softmax_loss_function=None, name=None):
	"""Product of weighted cross-entropy loss for sequences of logits, batch-collapsed.

	Args:
	logits: Lists of 2D Tensors of shape [batch_size x num_decoder_symbols] of size num_decoders.
	targets: Lists of 1D batch-sized int32 Tensors of the same lengths as logits.
	weights: List of 1D batch-sized float-Tensors of the same length as logits.
	average_across_timesteps: If set, divide the returned cost by the total
	label weight.
	average_across_batch: If set, divide the returned cost by the batch size.
	softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch
	to be used instead of the standard softmax (the default if this is None).
	name: Optional name for this operation, defaults to "sequence_loss".

	Returns:
	A scalar float Tensor: The products of average log-perplexities per symbol (weighted).

	Raises:
	ValueError: If len(logits) is different from len(targets) or len(weights).
	"""
	if len(targets) != len(logits) or num_decoders != len(logits):
		raise ValueError("Lengths of logits and targets must be %d, not "
			"%d, %d." % (num_decoders, len(logits), len(targets)))
	losses = []    
	for i in xrange(num_decoders):
		losses.append(tf.nn.seq2seq.sequence_loss(logits[i],targets[i], weights[i],
			average_across_timesteps,average_across_batch,softmax_loss_function,name) ) 
	return math_ops.reduce_prod(losses)
Esempio n. 15
0
def _determinant_from_sigma_chol(sigma_chol):
  det_last_dim = array_ops.rank(sigma_chol) - 2
  sigma_batch_diag = array_ops.batch_matrix_diag_part(sigma_chol)
  det = math_ops.square(math_ops.reduce_prod(
      sigma_batch_diag, reduction_indices=det_last_dim))
  det.set_shape(sigma_chol.get_shape()[:-2])
  return det
Esempio n. 16
0
 def _sample_n(self, n, seed=None):
   # Get ids as a [n, batch_size]-shaped matrix, unless batch_shape=[] then get
   # ids as a [n]-shaped vector.
   batch_size = (np.prod(self.batch_shape.as_list(), dtype=np.int32)
                 if self.batch_shape.is_fully_defined()
                 else math_ops.reduce_prod(self.batch_shape_tensor()))
   ids = self._mixture_distribution.sample(
       sample_shape=concat_vectors(
           [n],
           distribution_util.pick_vector(
               self.is_scalar_batch(),
               np.int32([]),
               [batch_size])),
       seed=distribution_util.gen_new_seed(
           seed, "poisson_lognormal_quadrature_compound"))
   # Stride `quadrature_size` for `batch_size` number of times.
   offset = math_ops.range(start=0,
                           limit=batch_size * self._quadrature_size,
                           delta=self._quadrature_size,
                           dtype=ids.dtype)
   ids += offset
   rate = array_ops.gather(
       array_ops.reshape(self.distribution.rate, shape=[-1]), ids)
   rate = array_ops.reshape(
       rate, shape=concat_vectors([n], self.batch_shape_tensor()))
   return random_ops.random_poisson(
       lam=rate, shape=[], dtype=self.dtype, seed=seed)
 def _prob(self, y):
   x, ildj = self.bijector.inverse_and_inverse_log_det_jacobian(y)
   x = self._maybe_rotate_dims(x, rotate_right=True)
   prob = self.distribution.prob(x)
   if self._is_maybe_event_override:
     prob = math_ops.reduce_prod(prob, self._reduce_event_indices)
   return math_ops.exp(ildj) * prob
Esempio n. 18
0
def embedding_lookup_unique(params, ids, name=None):
  """Version of embedding_lookup that avoids duplicate lookups.

  This can save communication in the case of repeated ids.
  Same interface as embedding_lookup. Except it supports multi-dimensional `ids`
  which allows to not reshape input/output to fit gather.

  Args:
    params: A list of tensors with the same shape and type, or a
      `PartitionedVariable`. Shape `[index, d1, d2, ...]`.
    ids: A one-dimensional `Tensor` with type `int32` or `int64` containing
      the ids to be looked up in `params`. Shape `[ids1, ids2, ...]`.
    name: A name for this operation (optional).

  Returns:
    A `Tensor` with the same type as the tensors in `params` and dimension of
    `[ids1, ids2, d1, d2, ...]`.

  Raises:
    ValueError: If `params` is empty.
  """
  with ops.name_scope(name, "EmbeddingLookupUnique", [params, ids]):
    ids = ops.convert_to_tensor(ids)
    shape = array_ops.shape(ids)
    ids_flat = array_ops.reshape(
        ids, math_ops.reduce_prod(shape, keep_dims=True))
    unique_ids, idx = array_ops.unique(ids_flat)
    unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids)
    embeds_flat = array_ops.gather(unique_embeddings, idx)
    embed_shape = array_ops.concat(
        [shape, array_ops.shape(unique_embeddings)[1:]], 0)
    embeds = array_ops.reshape(embeds_flat, embed_shape)
    embeds.set_shape(ids.get_shape().concatenate(
        unique_embeddings.get_shape()[1:]))
    return embeds
Esempio n. 19
0
def embedding_lookup(params, ids, name='embedding_lookup'):
  """Provides a N dimensional version of tf.embedding_lookup.

  Ids are flattened to a 1d tensor before being passed to embedding_lookup
  then, they are unflattend to match the original ids shape plus an extra
  leading dimension of the size of the embeddings.

  Args:
    params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1.
    ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1.
      Must contain indexes into params.
    name: Optional name for the op.

  Returns:
    A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1
    containing the values from the params tensor(s) for indecies in ids.

  Raises:
    ValueError: if some parameters are invalid.
  """
  with ops.name_scope(name, 'embedding_lookup', [params, ids]):
    params = ops.convert_to_tensor(params)
    ids = ops.convert_to_tensor(ids)
    shape = array_ops_.shape(ids)
    ids_flat = array_ops_.reshape(
        ids, math_ops.reduce_prod(shape, keep_dims=True))
    embeds_flat = nn.embedding_lookup(params, ids_flat, name)
    embed_shape = array_ops_.concat_v2([shape, [-1]], 0)
    embeds = array_ops_.reshape(embeds_flat, embed_shape)
    embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:]))
    return embeds
Esempio n. 20
0
 def _expand_sample_shape(self, sample_shape):
   """Helper to `sample` which ensures sample_shape is 1D."""
   sample_shape_static_val = tensor_util.constant_value(sample_shape)
   ndims = sample_shape.get_shape().ndims
   if sample_shape_static_val is None:
     if ndims is None or not sample_shape.get_shape().is_fully_defined():
       ndims = array_ops.rank(sample_shape)
     expanded_shape = distribution_util.pick_vector(
         math_ops.equal(ndims, 0),
         np.array((1,), dtype=dtypes.int32.as_numpy_dtype()),
         array_ops.shape(sample_shape))
     sample_shape = array_ops.reshape(sample_shape, expanded_shape)
     total = math_ops.reduce_prod(sample_shape)  # reduce_prod([]) == 1
   else:
     if ndims is None:
       raise ValueError(
           "Shouldn't be here; ndims cannot be none when we have a "
           "tf.constant shape.")
     if ndims == 0:
       sample_shape_static_val = np.reshape(sample_shape_static_val, [1])
       sample_shape = ops.convert_to_tensor(
           sample_shape_static_val,
           dtype=dtypes.int32,
           name="sample_shape")
     total = np.prod(sample_shape_static_val,
                     dtype=dtypes.int32.as_numpy_dtype())
   return sample_shape, total
Esempio n. 21
0
def _TopKGrad(op, grad, _):
  """Return the gradients for TopK.

  Args:
    op: The TopKOp for which we need to generate gradients.
    grad: Tensor. The gradients passed to the TopKOp.

  Returns:
    A list of two tensors, the first being the gradient w.r.t to the input and
    TopK, and the second being the gradient w.r.t. to the indices (all zero).
  """
  in_shape = array_ops.shape(op.inputs[0])
  ind_shape = array_ops.shape(op.outputs[1])

  ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1)
  # Flatten indices to 2D.
  ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack([-1, ind_lastdim]))

  in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1)
  outerdim = array_ops.shape(ind_2d)[0]
  # Compute linear indices (flattened to 1D).
  ind = array_ops.reshape(ind_2d + array_ops.expand_dims(
      math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1])

  # Substitute grad to appropriate locations and fill the rest with zeros,
  # finally reshaping it to the original input shape.
  return [array_ops.reshape(
      sparse_ops.sparse_to_dense(ind,
                                 array_ops.reshape(
                                     math_ops.reduce_prod(in_shape), [1]),
                                 array_ops.reshape(grad, [-1]),
                                 validate_indices=False),
      in_shape), array_ops.zeros(
          [], dtype=dtypes.int32)]
Esempio n. 22
0
  def per_step_batch_loss(self, features, mode, state):
    """Computes predictions, losses, and intermediate model states.

    Args:
      features: A dictionary with times, values, and (optionally) exogenous
          regressors. See `define_loss`.
      mode: The tf.estimator.ModeKeys mode to use (TRAIN, EVAL, INFER).
      state: Model-dependent state, each with size [batch size x ...]. The
          number and type will typically be fixed by the model (for example a
          mean and variance).
    Returns:
      A tuple of (loss, filtered_states, predictions)
        loss: Average loss values across the batch.
        filtered_states: For each Tensor in `state` with shape [batch size x
            ...], `filtered_states` has a Tensor with shape [batch size x window
            size x ...] with filtered state for each part of the batch and
            window.
        predictions: A dictionary with model-dependent one-step-ahead (or
            at-least-one-step-ahead with missing values) predictions, with keys
            indicating the type of prediction and values having shape [batch
            size x window size x ...]. For example state space models provide
            "mean", "covariance", and "log_likelihood".

    """
    self._check_graph_initialized()
    times = math_ops.cast(features[TrainEvalFeatures.TIMES], dtype=dtypes.int64)
    values = math_ops.cast(features[TrainEvalFeatures.VALUES], dtype=self.dtype)
    exogenous_regressors = self._process_exogenous_features(
        times=times,
        features={key: value for key, value in features.items()
                  if key not in [TrainEvalFeatures.TIMES,
                                 TrainEvalFeatures.VALUES]})
    def _batch_loss_filtering_step(step_number, current_times, state):
      """Make a prediction and update it based on data."""
      current_values = values[:, step_number, :]
      state = self._apply_exogenous_update(
          step_number=step_number, current_times=current_times, state=state,
          raw_features=features,
          embedded_exogenous_regressors=exogenous_regressors)
      predicted_state, predictions = self._prediction_step(
          current_times=current_times,
          state=state)
      filtered_state, outputs = self._filtering_step(
          current_times=current_times,
          current_values=current_values,
          state=predicted_state,
          predictions=predictions)
      return filtered_state, outputs
    state, outputs = self._state_update_loop(
        times=times, state=state, state_update_fn=_batch_loss_filtering_step,
        outputs=["loss"] + self._train_output_names)
    outputs["loss"].set_shape(times.get_shape())
    loss_sum = math_ops.reduce_sum(outputs["loss"])
    per_observation_loss = (loss_sum / math_ops.cast(
        math_ops.reduce_prod(array_ops.shape(times)), dtype=self.dtype))
    per_observation_loss += self._loss_additions(times, values, mode)
    # Since we have window-level additions to the loss, its per-step value is
    # misleading, so we avoid returning it.
    del outputs["loss"]
    return per_observation_loss, state, outputs
 def _finish_prob_for_one_fiber(self, y, x, ildj, distribution_kwargs):
   """Finish computation of prob on one element of the inverse image."""
   x = self._maybe_rotate_dims(x, rotate_right=True)
   prob = self.distribution.prob(x, **distribution_kwargs)
   if self._is_maybe_event_override:
     prob = math_ops.reduce_prod(prob, self._reduce_event_indices)
   return math_ops.exp(math_ops.cast(ildj, prob.dtype)) * prob
 def _shape_tensor(self):
   # See self.shape for explanation of steps
   s_shape = array_ops.shape(self._spectrum)
   batch_shape = s_shape[:-self.block_depth]
   trailing_dims = s_shape[-self.block_depth:]
   n = math_ops.reduce_prod(trailing_dims)
   n_x_n = [n, n]
   return array_ops.concat((batch_shape, n_x_n), 0)
Esempio n. 25
0
def _MeanGrad(op, grad):
  """Gradient for Mean."""
  sum_grad = _SumGrad(op, grad)[0]
  input_shape = op.inputs[0]._shape_tuple()  # pylint: disable=protected-access
  output_shape = op.outputs[0]._shape_tuple()  # pylint: disable=protected-access
  if (input_shape is not None and output_shape is not None and
      None not in input_shape and None not in output_shape):
    input_size = np.prod(input_shape)
    output_size = np.prod(output_shape)
    factor = input_size // max(output_size, 1)
    factor = constant_op.constant(factor, dtype=sum_grad.dtype)
  else:
    input_shape = array_ops.shape(op.inputs[0])
    output_shape = array_ops.shape(op.outputs[0])
    factor = _safe_shape_div(
        math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape))
  return math_ops.truediv(sum_grad, math_ops.cast(factor, sum_grad.dtype)), None
Esempio n. 26
0
  def run_test_sample_consistent_log_prob(
      self, sess_run_fn, dist,
      num_samples=int(1e5), num_threshold=int(1e3), seed=42,
      rtol=1e-2, atol=0.):
    """Tests that sample/log_prob are consistent with each other.

    "Consistency" means that `sample` and `log_prob` correspond to the same
    distribution.

    Note: this test only verifies a necessary condition for consistency--it does
    does not verify sufficiency hence does not prove `sample`, `log_prob` truly
    are consistent.

    Args:
      sess_run_fn: Python `callable` taking `list`-like of `Tensor`s and
        returning a list of results after running one "step" of TensorFlow
        computation, typically set to `sess.run`.
      dist: Distribution instance or object which implements `sample`,
        `log_prob`, `event_shape_tensor` and `batch_shape_tensor`.
      num_samples: Python `int` scalar indicating the number of Monte-Carlo
        samples to draw from `dist`.
      num_threshold: Python `int` scalar indicating the number of samples a
        bucket must contain before being compared to the probability.
        Default value: 1e3; must be at least 1.
        Warning, set too high will cause test to falsely pass but setting too
        low will cause the test to falsely fail.
      seed: Python `int` indicating the seed to use when sampling from `dist`.
        In general it is not recommended to use `None` during a test as this
        increases the likelihood of spurious test failure.
      rtol: Python `float`-type indicating the admissible relative error between
        analytical and sample statistics.
      atol: Python `float`-type indicating the admissible absolute error between
        analytical and sample statistics.

    Raises:
      ValueError: if `num_threshold < 1`.
    """
    if num_threshold < 1:
      raise ValueError("num_threshold({}) must be at least 1.".format(
          num_threshold))
    # Histogram only supports vectors so we call it once per batch coordinate.
    y = dist.sample(num_samples, seed=seed)
    y = array_ops.reshape(y, shape=[num_samples, -1])
    batch_size = math_ops.reduce_prod(dist.batch_shape_tensor())
    batch_dims = array_ops.shape(dist.batch_shape_tensor())[0]
    edges_expanded_shape = 1 + array_ops.pad([-2], paddings=[[0, batch_dims]])
    for b, x in enumerate(array_ops.unstack(y, axis=1)):
      counts, edges = self.histogram(x)
      edges = array_ops.reshape(edges, edges_expanded_shape)
      probs = math_ops.exp(dist.log_prob(edges))
      probs = array_ops.reshape(probs, shape=[-1, batch_size])[:, b]

      [counts_, probs_] = sess_run_fn([counts, probs])
      valid = counts_ > num_threshold
      probs_ = probs_[valid]
      counts_ = counts_[valid]
      self.assertAllClose(probs_, counts_ / num_samples,
                          rtol=rtol, atol=atol)
Esempio n. 27
0
def _ProdGrad(op, grad):
  """Gradient for Prod."""
  # The gradient can be expressed by dividing the product by each entry of the
  # input tensor, but this approach can't deal with zeros in the input.
  # Here, we avoid this problem by composing the output as a product of two
  # cumprod operations.

  input_shape = array_ops.shape(op.inputs[0])
  # Reshape reduction indices for the case where the parameter is a scalar
  reduction_indices = array_ops.reshape(op.inputs[1], [-1])

  # Expand grad to full input shape
  output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
  tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims)
  grad = array_ops.reshape(grad, output_shape_kept_dims)
  grad = array_ops.tile(grad, tile_scaling)

  # Pack all reduced dimensions into a single one, so we can perform the
  # cumprod ops. If the reduction dims list is empty, it defaults to float32,
  # so we need to cast here.  We put all the shape-related ops on CPU to avoid
  # copying back and forth, and since listdiff is CPU only.
  with ops.device("/cpu:0"):
    rank = array_ops.rank(op.inputs[0])
    reduction_indices = (reduction_indices + rank) % rank
    reduced = math_ops.cast(reduction_indices, dtypes.int32)
    idx = math_ops.range(0, rank)
    other, _ = array_ops.setdiff1d(idx, reduced)
    perm = array_ops.concat([reduced, other], 0)
    reduced_num = math_ops.reduce_prod(array_ops.gather(input_shape, reduced))
    other_num = math_ops.reduce_prod(array_ops.gather(input_shape, other))
  permuted = array_ops.transpose(op.inputs[0], perm)
  permuted_shape = array_ops.shape(permuted)
  reshaped = array_ops.reshape(permuted, (reduced_num, other_num))

  # Calculate product, leaving out the current entry
  left = math_ops.cumprod(reshaped, axis=0, exclusive=True)
  right = math_ops.cumprod(reshaped, axis=0, exclusive=True, reverse=True)
  # For complex inputs, the gradient is in the conjugate direction.
  y = array_ops.reshape(math_ops.conj(left) * math_ops.conj(right),
                        permuted_shape)

  # Invert the transpose and reshape operations.
  # Make sure to set the statically known shape information through a reshape.
  out = grad * array_ops.transpose(y, array_ops.invert_permutation(perm))
  return array_ops.reshape(out, input_shape), None
 def _prob(self, y, bijector_kwargs=None, distribution_kwargs=None):
   bijector_kwargs = bijector_kwargs or {}
   distribution_kwargs = distribution_kwargs or {}
   x, ildj = self.bijector.inverse_and_inverse_log_det_jacobian(
       y, **bijector_kwargs)
   prob = self.distribution.prob(x, **distribution_kwargs)
   if self._override_event_shape is not None:
     prob = math_ops.reduce_prod(prob, self._reduce_event_indices)
   return math_ops.exp(ildj) * prob
Esempio n. 29
0
 def testProdGradientForNegativeAxis(self):
   inputs = constant_op.constant([[1., 2.], [3., 4.]],
                                 dtype=dtypes.float32)
   outputs = math_ops.reduce_prod(inputs, -1)
   with self.cached_session():
     error = gradient_checker.compute_gradient_error(
         inputs, inputs.get_shape().as_list(),
         outputs, outputs.get_shape().as_list())
     self.assertLess(error, 1e-4)
Esempio n. 30
0
def _MeanGrad(op, grad):
  """Gradient for Mean."""
  sum_grad = _SumGrad(op, grad)[0]
  input_shape = array_ops.shape(op.inputs[0])
  output_shape = array_ops.shape(op.outputs[0])
  # TODO(apassos) remove this device hackery as eager copy to device becomes
  # more seamless.
  with ops.colocate_with(input_shape):
    factor = _safe_shape_div(
        math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape))
  if context.in_eager_mode():
    # Note that we go through numpy here just so we use the eager per-device
    # scalar cache. We know the factor is a host memory tensor because it's a
    # shape, and we also know that converting a scalar into a tensor triggers a
    # per-device cache.
    factor = factor.numpy()
    factor = constant_op.constant(factor, dtype=sum_grad.dtype)
  return sum_grad / math_ops.cast(factor, sum_grad.dtype), None
Esempio n. 31
0
def lu_solve(lower_upper, perm, rhs, validate_args=False, name=None):
    """Solves systems of linear eqns `A X = RHS`, given LU factorizations.

  Note: this function does not verify the implied matrix is actually invertible
  nor is this condition checked even when `validate_args=True`.

  Args:
    lower_upper: `lu` as returned by `tf.linalg.lu`, i.e., if `matmul(P,
      matmul(L, U)) = X` then `lower_upper = L + U - eye`.
    perm: `p` as returned by `tf.linag.lu`, i.e., if `matmul(P, matmul(L, U)) =
      X` then `perm = argmax(P)`.
    rhs: Matrix-shaped float `Tensor` representing targets for which to solve;
      `A X = RHS`. To handle vector cases, use: `lu_solve(..., rhs[...,
        tf.newaxis])[..., 0]`.
    validate_args: Python `bool` indicating whether arguments should be checked
      for correctness. Note: this function does not verify the implied matrix is
        actually invertible, even when `validate_args=True`.
      Default value: `False` (i.e., don't validate arguments).
    name: Python `str` name given to ops managed by this object.
      Default value: `None` (i.e., 'lu_solve').

  Returns:
    x: The `X` in `A @ X = RHS`.

  #### Examples

  ```python
  import numpy as np
  import tensorflow as tf
  import tensorflow_probability as tfp

  x = [[[1., 2],
        [3, 4]],
       [[7, 8],
        [3, 4]]]
  inv_x = tf.linalg.lu_solve(*tf.linalg.lu(x), rhs=tf.eye(2))
  tf.assert_near(tf.matrix_inverse(x), inv_x)
  # ==> True
  ```

  """

    with ops.name_scope(name or 'lu_solve'):
        lower_upper = ops.convert_to_tensor(lower_upper,
                                            dtype_hint=dtypes.float32,
                                            name='lower_upper')
        perm = ops.convert_to_tensor(perm,
                                     dtype_hint=dtypes.int32,
                                     name='perm')
        rhs = ops.convert_to_tensor(rhs,
                                    dtype_hint=lower_upper.dtype,
                                    name='rhs')

        assertions = _lu_solve_assertions(lower_upper, perm, rhs,
                                          validate_args)
        if assertions:
            with ops.control_dependencies(assertions):
                lower_upper = array_ops.identity(lower_upper)
                perm = array_ops.identity(perm)
                rhs = array_ops.identity(rhs)

        if (rhs.shape.rank == 2 and perm.shape.rank == 1):
            # Both rhs and perm have scalar batch_shape.
            permuted_rhs = array_ops.gather(rhs, perm, axis=-2)
        else:
            # Either rhs or perm have non-scalar batch_shape or we can't determine
            # this information statically.
            rhs_shape = array_ops.shape(rhs)
            broadcast_batch_shape = array_ops.broadcast_dynamic_shape(
                rhs_shape[:-2],
                array_ops.shape(perm)[:-1])
            d, m = rhs_shape[-2], rhs_shape[-1]
            rhs_broadcast_shape = array_ops.concat(
                [broadcast_batch_shape, [d, m]], axis=0)

            # Tile out rhs.
            broadcast_rhs = array_ops.broadcast_to(rhs, rhs_broadcast_shape)
            broadcast_rhs = array_ops.reshape(broadcast_rhs, [-1, d, m])

            # Tile out perm and add batch indices.
            broadcast_perm = array_ops.broadcast_to(perm,
                                                    rhs_broadcast_shape[:-1])
            broadcast_perm = array_ops.reshape(broadcast_perm, [-1, d])
            broadcast_batch_size = math_ops.reduce_prod(broadcast_batch_shape)
            broadcast_batch_indices = array_ops.broadcast_to(
                math_ops.range(broadcast_batch_size)[:, array_ops.newaxis],
                [broadcast_batch_size, d])
            broadcast_perm = array_ops.stack(
                [broadcast_batch_indices, broadcast_perm], axis=-1)

            permuted_rhs = array_ops.gather_nd(broadcast_rhs, broadcast_perm)
            permuted_rhs = array_ops.reshape(permuted_rhs, rhs_broadcast_shape)

        lower = set_diag(
            band_part(lower_upper, num_lower=-1, num_upper=0),
            array_ops.ones(array_ops.shape(lower_upper)[:-1],
                           dtype=lower_upper.dtype))
        return triangular_solve(
            lower_upper,  # Only upper is accessed.
            triangular_solve(lower, permuted_rhs),
            lower=False)
Esempio n. 32
0
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight):
  """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape
      `[batch_size, beam_width, vocab_size]`
    next_cell_state: The next state from the cell, e.g. an instance of
      AttentionWrapperState if the cell is attentional.
    beam_state: Current state of the beam search.
      An instance of `BeamSearchDecoderState`.
    batch_size: The batch size for this input.
    beam_width: Python int.  The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.

  Returns:
    A new beam state.
  """
  static_batch_size = tensor_util.constant_value(batch_size)

  # Calculate the current lengths of the predictions
  prediction_lengths = beam_state.lengths
  previously_finished = beam_state.finished

  # Calculate the total log probs for the new hypotheses
  # Final Shape: [batch_size, beam_width, vocab_size]
  step_log_probs = nn_ops.log_softmax(logits)
  step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished)
  total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs

  # Calculate the continuation lengths by adding to all continuing beams.
  vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1]
  lengths_to_add = array_ops.one_hot(
      indices=array_ops.fill([batch_size, beam_width], end_token),
      depth=vocab_size,
      on_value=np.int64(0), off_value=np.int64(1),
      dtype=dtypes.int64)
  add_mask = math_ops.to_int64(math_ops.logical_not(previously_finished))
  lengths_to_add *= array_ops.expand_dims(add_mask, 2)
  new_prediction_lengths = (
      lengths_to_add + array_ops.expand_dims(prediction_lengths, 2))

  # Calculate the scores for each beam
  scores = _get_scores(
      log_probs=total_probs,
      sequence_lengths=new_prediction_lengths,
      length_penalty_weight=length_penalty_weight)

  time = ops.convert_to_tensor(time, name="time")
  # During the first time step we only consider the initial beam
  scores_shape = array_ops.shape(scores)
  scores_flat = control_flow_ops.cond(
      time > 0,
      lambda: array_ops.reshape(scores, [batch_size, -1]),
      lambda: scores[:, 0])
  num_available_beam = control_flow_ops.cond(
      time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]),
      lambda: math_ops.reduce_prod(scores_shape[2:]))

  # Pick the next beams according to the specified successors function
  next_beam_size = math_ops.minimum(
      ops.convert_to_tensor(beam_width, dtype=dtypes.int32, name="beam_width"),
      num_available_beam)
  next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size)

  next_beam_scores.set_shape([static_batch_size, beam_width])
  word_indices.set_shape([static_batch_size, beam_width])

  # Pick out the probs, beam_ids, and states according to the chosen predictions
  next_beam_probs = _tensor_gather_helper(
      gather_indices=word_indices,
      gather_from=total_probs,
      batch_size=batch_size,
      range_size=beam_width * vocab_size,
      gather_shape=[-1],
      name="next_beam_probs")
  # Note: just doing the following
  #   math_ops.to_int32(word_indices % vocab_size,
  #       name="next_beam_word_ids")
  # would be a lot cleaner but for reasons unclear, that hides the results of
  # the op which prevents capturing it with tfdbg debug ops.
  raw_next_word_ids = math_ops.mod(word_indices, vocab_size,
                                   name="next_beam_word_ids")
  next_word_ids = math_ops.to_int32(raw_next_word_ids)
  next_beam_ids = math_ops.to_int32(word_indices / vocab_size,
                                    name="next_beam_parent_ids")

  # Append new ids to current predictions
  previously_finished = _tensor_gather_helper(
      gather_indices=next_beam_ids,
      gather_from=previously_finished,
      batch_size=batch_size,
      range_size=beam_width,
      gather_shape=[-1])
  next_finished = math_ops.logical_or(previously_finished,
                                      math_ops.equal(next_word_ids, end_token),
                                      name="next_beam_finished")

  # Calculate the length of the next predictions.
  # 1. Finished beams remain unchanged
  # 2. Beams that are now finished (EOS predicted) remain unchanged
  # 3. Beams that are not yet finished have their length increased by 1
  lengths_to_add = math_ops.to_int64(math_ops.logical_not(next_finished))
  next_prediction_len = _tensor_gather_helper(
      gather_indices=next_beam_ids,
      gather_from=beam_state.lengths,
      batch_size=batch_size,
      range_size=beam_width,
      gather_shape=[-1])
  next_prediction_len += lengths_to_add

  # Pick out the cell_states according to the next_beam_ids. We use a
  # different gather_shape here because the cell_state tensors, i.e.
  # the tensors that would be gathered from, all have dimension
  # greater than two and we need to preserve those dimensions.
  # pylint: disable=g-long-lambda
  next_cell_state = nest.map_structure(
      lambda gather_from: _maybe_tensor_gather_helper(
          gather_indices=next_beam_ids,
          gather_from=gather_from,
          batch_size=batch_size,
          range_size=beam_width,
          gather_shape=[batch_size * beam_width, -1]),
      next_cell_state)
  # pylint: enable=g-long-lambda

  next_state = BeamSearchDecoderState(
      cell_state=next_cell_state,
      log_probs=next_beam_probs,
      lengths=next_prediction_len,
      finished=next_finished)

  output = BeamSearchDecoderOutput(
      scores=next_beam_scores,
      predicted_ids=next_word_ids,
      parent_ids=next_beam_ids)

  return output, next_state
Esempio n. 33
0
def sufficient_statistics(x, axes, shift=True, keep_dims=False, name=None):
    """Calculate the sufficient statistics for the mean and variance of `x`.

  These sufficient statistics are computed using the one pass algorithm on
  an input that's optionally shifted using the value of the 1st element in `x`.
  See:
  https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Computing_shifted_data

  Args:
    x: A `Tensor`.
    axes: Array of ints. Axes along which to compute mean and variance.
    shift: If true, shift the data to provide more numerically stable results.
    keep_dims: produce statistics with the same dimensionality as the input.
    name: Name used to scope the operations that compute the sufficient stats.

  Returns:
    Four `Tensor` objects of the same type as `x`:
    * the count (number of elements to average over).
    * the (possibly shifted) sum of the elements in the array.
    * the (possibly shifted) sum of squares of the elements in the array.
    * the shift by which the mean must be corrected or None if `shift` is False.
  """
    with ops.op_scope([x, axes], name, "sufficient_statistics"):
        x = ops.convert_to_tensor(x, name="x")
        x_shape = x.get_shape()
        if x_shape.is_fully_defined():
            counts = 1
            m_shape = []
            for d in xrange(x_shape.ndims):
                dim = x_shape[d].value
                if d in set(axes):
                    counts *= dim
                    dim = 1
                m_shape.append(dim)
            counts = constant_op.constant(counts, dtype=x.dtype)
        else:  # shape needs to be inferred at runtime.
            x_shape = array_ops.shape(x)
            select_axes = sparse_ops.sparse_to_dense(axes,
                                                     array_ops.shape(x_shape),
                                                     True, False)
            m_shape = math_ops.select(select_axes,
                                      array_ops.ones_like(x_shape), x_shape)
            counts = math_ops.cast(math_ops.reduce_prod(x_shape / m_shape),
                                   x.dtype,
                                   name="count")
        if shift:
            shift_value = array_ops.slice(x, array_ops.zeros_like(m_shape),
                                          m_shape)
            m_ss = math_ops.sub(x, shift_value)
            v_ss = math_ops.squared_difference(x, shift_value)
            if keep_dims:
                shift_value = array_ops.identity(shift_value, name="shift")
            else:
                shift_value = array_ops.squeeze(shift_value,
                                                squeeze_dims=axes,
                                                name="shift")
        else:  # not shift.
            m_ss = x
            v_ss = math_ops.square(x)
            shift_value = None
        m_ss = math_ops.reduce_sum(m_ss,
                                   axes,
                                   keep_dims=keep_dims,
                                   name="mean_ss")
        v_ss = math_ops.reduce_sum(v_ss,
                                   axes,
                                   keep_dims=keep_dims,
                                   name="var_ss")
    return counts, m_ss, v_ss, shift_value
Esempio n. 34
0
    def _subdiv_calculate_mean_and_var(self, x, axes, keep_dims):

        with K.name_scope('moments'):
            # The dynamic range of fp16 is too limited to support the collection of
            # sufficient statistics. As a workaround we simply perform the operations
            # on 32-bit floats before converting the mean and variance back to fp16
            y = math_ops.cast(
                x, dtypes.float32) if x.dtype == dtypes.float16 else x
            replica_ctx = ds.get_replica_context()

            if replica_ctx:
                # local to me

                local_sum = math_ops.reduce_sum(y, axis=axes, keepdims=True)
                local_squared_sum = math_ops.reduce_sum(math_ops.square(y),
                                                        axis=axes,
                                                        keepdims=True)
                batch_size = math_ops.cast(
                    array_ops.shape_v2(y)[0], dtypes.float32)
                # TODO(b/163099951): batch the all-reduces once we sort out the ordering
                # issue for NCCL. We don't have a mechanism to launch NCCL in the same
                # order in each replica nowadays, so we limit NCCL to batch all-reduces.
                # get the sum of all replicas (converge all devices)
                y_sum = replica_ctx.all_reduce(reduce_util.ReduceOp.SUM,
                                               local_sum)
                # get the sum from all replicas (converge all devices)
                y_squared_sum = replica_ctx.all_reduce(
                    reduce_util.ReduceOp.SUM, local_squared_sum)
                # get the net batch size from all devices (converge all devices)
                input_batch_size = replica_ctx.all_reduce(
                    reduce_util.ReduceOp.SUM, batch_size)

                #tf.print(replica_ctx.replica_id_in_sync_group, replica_ctx.num_replicas_in_sync, batch_size, self.aggregated_square_sum_batch, axes)
                # get the number of total params you are averaging (local)
                axes_vals = [(array_ops.shape_v2(y))[i]
                             for i in range(1, len(axes))]
                multiplier_ = math_ops.cast(math_ops.reduce_prod(axes_vals),
                                            dtypes.float32)
                multiplier = multiplier_ * input_batch_size

                # conver mean var (locally)
                mean = y_sum / multiplier
                y_squared_mean = y_squared_sum / multiplier
                # var = E(x^2) - E(x)^2
                variance = y_squared_mean - math_ops.square(mean)
                net_sum = y_sum / multiplier_
                squared_mean = y_squared_sum / multiplier_

            else:
                # mean = math_ops.reduce_mean(y, axes, keepdims=True, name='mean')
                # # sample variance, not unbiased variance
                # # Note: stop_gradient does not change the gradient that gets
                # #       backpropagated to the mean from the variance calculation,
                # #       because that gradient is zero
                # variance = math_ops.reduce_mean(
                #     math_ops.squared_difference(y, array_ops.stop_gradient(mean)),
                #     axes,
                #     keepdims=True,
                #     name='variance')

                net_sum = math_ops.reduce_sum(y, axis=axes, keepdims=True)
                squared_mean = math_ops.reduce_sum(math_ops.square(y),
                                                   axis=axes,
                                                   keepdims=True)

                if self._support_zero_size_input():
                    # Keras assumes that batch dimension is the first dimension for Batch
                    # Normalization.
                    input_batch_size = array_ops.shape(y)[0]
                else:
                    input_batch_size = None

                # get the number of total params you are averaging including batchsize(local)
                axes_vals = [(array_ops.shape_v2(y))[i]
                             for i in range(1, len(axes))]
                multiplier = math_ops.cast(math_ops.reduce_prod(axes_vals),
                                           dtypes.float32)

                squared_mean = squared_mean / multiplier
                net_sum = net_sum / multiplier

                if input_batch_size is None:
                    mean, variance = nn.moments(y, axes, keep_dims=True)
                    input_batch_size = 0
                else:
                    batches_ = math_ops.cast(input_batch_size,
                                             self._param_dtype)
                    # # if you only have one replica dont worry about it
                    # # Compute true mean while keeping the dims for proper broadcasting.
                    mean = net_sum / batches_
                    variance = squared_mean / batches_ - math_ops.square(mean)

            input_batch_size = math_ops.cast(input_batch_size, dtypes.int32)
            if not keep_dims:
                mean = array_ops.squeeze(mean, axes)
                net_sum = array_ops.squeeze(net_sum, axes)
                variance = array_ops.squeeze(variance, axes)
                squared_mean = array_ops.squeeze(squared_mean, axes)
            if x.dtype == dtypes.float16:
                return (math_ops.cast(mean, dtypes.float16),
                        math_ops.cast(net_sum, dtypes.float16),
                        math_ops.cast(variance, dtypes.float16),
                        math_ops.cast(squared_mean,
                                      dtypes.float16), input_batch_size)
            else:
                return (mean, net_sum, variance, squared_mean,
                        input_batch_size)
Esempio n. 35
0
def _fft_size_for_grad(grad, rank):
    return _math_ops.reduce_prod(_array_ops.shape(grad)[-rank:])
Esempio n. 36
0
 def f(x):
     pointwise = math_ops.sin(x) * math_ops.tan(x)
     return math_ops.reduce_prod(pointwise +
                                 math_ops.reduce_sum(pointwise),
                                 axis=1)
Esempio n. 37
0
 def fun(x):
     return math_ops.reduce_prod(math_ops.tanh(x)**2)
Esempio n. 38
0
 def _determinant(self):
   reduction_indices = [-(i + 1) for i in range(self.block_depth)]
   det = math_ops.reduce_prod(
       self.spectrum, reduction_indices=reduction_indices)
   return math_ops.cast(det, self.dtype)
Esempio n. 39
0
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight):
    """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape
      `[batch_size, beam_width, vocab_size]`
    next_cell_state: The next state from the cell, e.g. an instance of
      AttentionWrapperState if the cell is attentional.
    beam_state: Current state of the beam search.
      An instance of `BeamSearchDecoderState`.
    batch_size: The batch size for this input.
    beam_width: Python int.  The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.

  Returns:
    A new beam state.
  """

    static_batch_size = tensor_util.constant_value(batch_size)

    # Calculate the current lengths of the predictions
    prediction_lengths = beam_state.lengths
    previously_finished = beam_state.finished

    # Calculate the total log probs for the new hypotheses
    # Final Shape: [batch_size, beam_width, vocab_size]
    step_log_probs = nn_ops.log_softmax(logits)
    #step_log_probs",Tensor shape=(?, 10, 56136)
    step_log_probs = _mask_probs(step_log_probs, end_token,
                                 previously_finished)
    #step_log_probs_masked (?, 10, 56136)
    total_probs = array_ops.expand_dims(beam_state.log_probs,
                                        2) + step_log_probs
    #total_probs (?, 10, 56136)
    # Calculate the continuation lengths by adding to all continuing beams.
    vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1]
    lengths_to_add = array_ops.one_hot(
        indices=array_ops.tile(array_ops.reshape(end_token, [1, 1]),
                               [batch_size, beam_width]),
        depth=vocab_size,
        on_value=constant_op.constant(0, dtype=dtypes.int64),
        off_value=constant_op.constant(1, dtype=dtypes.int64),
        dtype=dtypes.int64)
    #lengths_to_add shape=(?, 10, 56136)
    add_mask = (1 - math_ops.to_int64(previously_finished))
    #add_mask shape=(?, 10), dtype=int64
    lengths_to_add = array_ops.expand_dims(add_mask, 2) * lengths_to_add
    #lengths_to_add shape=(?, 10, 56136)
    new_prediction_lengths = (lengths_to_add +
                              array_ops.expand_dims(prediction_lengths, 2))
    #new_prediction_lengths shape=(?, 10, 56136)
    # Calculate the scores for each beam
    scores = _get_scores(log_probs=total_probs,
                         sequence_lengths=new_prediction_lengths,
                         length_penalty_weight=length_penalty_weight)
    scores_mask = tf.constant([step_log_probs.dtype.min, 0],
                              dtype=dtypes.float32,
                              shape=[vocab_size],
                              name='mask')
    scores_masked = tf.add(scores, scores_mask)
    scores_mask2 = tf.constant([0, 0, 0, 0, 0, step_log_probs.dtype.min, 0],
                               dtype=dtypes.float32,
                               shape=[vocab_size],
                               name='mask2')
    scores_masked = tf.add(scores_mask2, scores_masked)

    def new_scores(scores_masked):
        scores_no_stop = tf.constant([0, 0, step_log_probs.dtype.min, 0],
                                     dtype=dtypes.float32,
                                     shape=[vocab_size],
                                     name='no_stop')
        scores = tf.add(scores_masked, scores_no_stop)
        return scores

    #constrain the length
    scores = control_flow_ops.cond(
        #time <9 ,
        time < 0,
        lambda: new_scores(scores_masked),
        lambda: scores_masked)

    #scores shape=(?, 10, 56136)
    #[batch_size, beam_width, vocab_size]
    time = ops.convert_to_tensor(time, name="time")
    # During the first time step we only consider the initial beam
    scores_shape = array_ops.shape(scores)
    #scores_shape" shape=(3,)
    scores_to_flat_1 = array_ops.reshape(scores, [batch_size, 2, -1])
    print("scores_to_flat_1", scores_to_flat_1)
    scores_to_0 = scores[:, 0]
    scores_to_1 = scores[:, -1]
    scores_to_flat_2 = tf.concat([scores_to_0, scores_to_1], 1)
    scores_flat = control_flow_ops.cond(
        time > 0, lambda: scores_to_flat_1,
        lambda: array_ops.reshape(scores_to_flat_2, [batch_size, 2, -1]))
    num_available_beam = control_flow_ops.cond(
        time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]),
        lambda: math_ops.reduce_prod(scores_shape[2:]))
    #scores_flat", shape=(?, ?)
    #num_available_beam" shape=()
    # Pick the next beams according to the specified successors function
    next_beam_size = math_ops.minimum(
        ops.convert_to_tensor(beam_width,
                              dtype=dtypes.int32,
                              name="beam_width"), num_available_beam)
    #scores_t = tf.reshape(scores_flat,[batch_size,2,-1])
    ############################
    #input_words=['entrencheds01', 'entrencheds02', 'forgev01', 'forgev04', \
    #             'hitn02', 'hitn03', 'vaultn02', 'vaultn04', 'deepa03', \
    #             'deeps02', 'admitv01', 'admitv02', 'plantn01', 'plantn02',\
    #             'squaren01', 'squaren05', 'drawv05', 'drawv06', 'spellv03', \
    #             'spellv02', 'shotn02', 'shotn04', 'coachv01', 'coachv02', 'casen05',\
    #             'casen09', 'focusn01', 'focusn02', 'tasten01', 'tasten04', 'footn01', \
    #             'footv01']
    input_words = get_words()
    return_list = prior_scores(input_words)
    return_array = np.array(return_list)
    return_tensor = tf.convert_to_tensor(return_array)
    tiling = [1, 5, 1]
    prior_mask = tf.tile(tf.expand_dims(return_tensor, 1), tiling)
    prior_mask = tf.cast(prior_mask, tf.float32)
    prior_mask = array_ops.reshape(prior_mask, [batch_size, -1])
    #print ("prior_mask",prior_mask)
    scores_sum = tf.reduce_sum(scores_to_flat_1, 1)

    #print ("scores_sum_1",scores_sum)
    #def cal_scores_sum(scores_sum,prior_mask):
    #    return tf.add(scores_sum,prior_mask)
    #scores_sum = control_flow_ops.cond(
    #    time > 0,
    #    lambda: cal_scores_sum(scores_sum,prior_mask),
    #    lambda: scores_sum)
    #scores_sum=tf.add(scores_sum,prior_mask)
    #print ("scores_sum_2",scores_sum)
    ############################

    #scores_final=tf.concat([scores_sum, scores_sum],1)
    def cal_scores_indices(scores_to_0, scores_to_1):
        next_beam_scores_1, word_indices_1 = nn_ops.top_k(scores_to_0, k=5)
        print("ori next_beam_scores_1,word_indices_1", next_beam_scores_1)
        print("ori word_indices_1", word_indices_1)
        next_beam_scores_2, word_indices_2 = nn_ops.top_k(scores_to_1, k=5)
        next_beam_scores = tf.concat([next_beam_scores_1, next_beam_scores_2],
                                     1)
        word_indices = tf.concat(
            [word_indices_1, word_indices_2 + 9 * vocab_size], 1)
        return next_beam_scores, word_indices

    def cal_scores_indices_t1(scores_final, next_beam_size):
        next_beam_scores_1, word_indices_1 = nn_ops.top_k(scores_final, k=5)
        #next_beam_scores_1, word_indices_1=sample(next_beam_scores_1,word_indices_1)
        print("next_beam_scores_1", next_beam_scores_1)
        print("word_indices_1", word_indices_1)
        next_beam_scores = tf.concat([next_beam_scores_1, next_beam_scores_1],
                                     1)
        word_indices = tf.concat(
            [word_indices_1, word_indices_1 + 5 * vocab_size], 1)
        return next_beam_scores, word_indices

    next_beam_scores, word_indices = control_flow_ops.cond(
        time > 0, lambda: cal_scores_indices_t1(scores_sum, next_beam_size),
        lambda: cal_scores_indices(scores_to_0, scores_to_1))

    next_beam_scores.set_shape([static_batch_size, beam_width])
    word_indices.set_shape([static_batch_size, beam_width])
    #shape=(?, ?)
    # Pick out the probs, beam_ids, and states according to the chosen predictions

    next_beam_probs = _tensor_gather_helper(gather_indices=word_indices,
                                            gather_from=total_probs,
                                            batch_size=batch_size,
                                            range_size=beam_width * vocab_size,
                                            gather_shape=[-1],
                                            name="next_beam_probs")
    # Note: just doing the following
    #   math_ops.to_int32(word_indices % vocab_size,
    #       name="next_beam_word_ids")
    # would be a lot cleaner but for reasons unclear, that hides the results of
    # the op which prevents capturing it with tfdbg debug ops.
    raw_next_word_ids = math_ops.mod(word_indices,
                                     vocab_size,
                                     name="next_beam_word_ids")
    #raw_next_word_ids shape=(?, 10)
    next_word_ids = math_ops.to_int32(raw_next_word_ids)
    next_beam_ids = math_ops.to_int32(word_indices / vocab_size,
                                      name="next_beam_parent_ids")

    # Append new ids to current predictions
    previously_finished = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=previously_finished,
        batch_size=batch_size,
        range_size=beam_width,
        gather_shape=[-1])
    next_finished = math_ops.logical_or(previously_finished,
                                        math_ops.equal(next_word_ids,
                                                       end_token),
                                        name="next_beam_finished")

    # Calculate the length of the next predictions.
    # 1. Finished beams remain unchanged
    # 2. Beams that are now finished (EOS predicted) remain unchanged
    # 3. Beams that are not yet finished have their length increased by 1
    lengths_to_add = math_ops.to_int64(
        math_ops.not_equal(next_word_ids, end_token))
    lengths_to_add = (1 - math_ops.to_int64(next_finished)) * lengths_to_add
    next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids,
                                                gather_from=beam_state.lengths,
                                                batch_size=batch_size,
                                                range_size=beam_width,
                                                gather_shape=[-1])
    next_prediction_len += lengths_to_add

    # Pick out the cell_states according to the next_beam_ids. We use a
    # different gather_shape here because the cell_state tensors, i.e.
    # the tensors that would be gathered from, all have dimension
    # greater than two and we need to preserve those dimensions.
    # pylint: disable=g-long-lambda
    next_cell_state = nest.map_structure(
        lambda gather_from: _maybe_tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=gather_from,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1]), next_cell_state)
    # pylint: enable=g-long-lambda

    next_state = BeamSearchDecoderState(cell_state=next_cell_state,
                                        log_probs=next_beam_probs,
                                        lengths=next_prediction_len,
                                        finished=next_finished)
    print('next_beam_probs', next_beam_probs)
    output = BeamSearchDecoderOutput(scores=next_beam_scores,
                                     predicted_ids=next_word_ids,
                                     parent_ids=next_beam_ids)

    return output, next_state
Esempio n. 40
0
 def test(self):
     result_lt = ops.reduce_prod(self.original_lt, {'channel'})
     golden_lt = core.LabeledTensor(
         math_ops.reduce_prod(self.original_lt.tensor, 1),
         [self.a0, self.a2, self.a3])
     self.assertLabeledTensorsEqual(result_lt, golden_lt)
Esempio n. 41
0
    def _sample_n(self, n, seed=None):
        with ops.control_dependencies(self._assertions):
            n = ops.convert_to_tensor(n, name="n")
            static_n = tensor_util.constant_value(n)
            n = int(static_n) if static_n is not None else n
            cat_samples = self.cat.sample(n, seed=seed)

            static_samples_shape = cat_samples.get_shape()
            if static_samples_shape.is_fully_defined():
                samples_shape = static_samples_shape.as_list()
                samples_size = static_samples_shape.num_elements()
            else:
                samples_shape = array_ops.shape(cat_samples)
                samples_size = array_ops.size(cat_samples)
            static_batch_shape = self.batch_shape
            if static_batch_shape.is_fully_defined():
                batch_shape = static_batch_shape.as_list()
                batch_size = static_batch_shape.num_elements()
            else:
                batch_shape = self.batch_shape_tensor()
                batch_size = math_ops.reduce_prod(batch_shape)
            static_event_shape = self.event_shape
            if static_event_shape.is_fully_defined():
                event_shape = np.array(static_event_shape.as_list(),
                                       dtype=np.int32)
            else:
                event_shape = self.event_shape_tensor()

            # Get indices into the raw cat sampling tensor. We will
            # need these to stitch sample values back out after sampling
            # within the component partitions.
            samples_raw_indices = array_ops.reshape(
                math_ops.range(0, samples_size), samples_shape)

            # Partition the raw indices so that we can use
            # dynamic_stitch later to reconstruct the samples from the
            # known partitions.
            partitioned_samples_indices = data_flow_ops.dynamic_partition(
                data=samples_raw_indices,
                partitions=cat_samples,
                num_partitions=self.num_components)

            # Copy the batch indices n times, as we will need to know
            # these to pull out the appropriate rows within the
            # component partitions.
            batch_raw_indices = array_ops.reshape(
                array_ops.tile(math_ops.range(0, batch_size), [n]),
                samples_shape)

            # Explanation of the dynamic partitioning below:
            #   batch indices are i.e., [0, 1, 0, 1, 0, 1]
            # Suppose partitions are:
            #     [1 1 0 0 1 1]
            # After partitioning, batch indices are cut as:
            #     [batch_indices[x] for x in 2, 3]
            #     [batch_indices[x] for x in 0, 1, 4, 5]
            # i.e.
            #     [1 1] and [0 0 0 0]
            # Now we sample n=2 from part 0 and n=4 from part 1.
            # For part 0 we want samples from batch entries 1, 1 (samples 0, 1),
            # and for part 1 we want samples from batch entries 0, 0, 0, 0
            #   (samples 0, 1, 2, 3).
            partitioned_batch_indices = data_flow_ops.dynamic_partition(
                data=batch_raw_indices,
                partitions=cat_samples,
                num_partitions=self.num_components)
            samples_class = [None for _ in range(self.num_components)]

            for c in range(self.num_components):
                n_class = array_ops.size(partitioned_samples_indices[c])
                seed = distribution_util.gen_new_seed(seed, "mixture")
                samples_class_c = self.components[c].sample(n_class, seed=seed)

                # Pull out the correct batch entries from each index.
                # To do this, we may have to flatten the batch shape.

                # For sample s, batch element b of component c, we get the
                # partitioned batch indices from
                # partitioned_batch_indices[c]; and shift each element by
                # the sample index. The final lookup can be thought of as
                # a matrix gather along locations (s, b) in
                # samples_class_c where the n_class rows correspond to
                # samples within this component and the batch_size columns
                # correspond to batch elements within the component.
                #
                # Thus the lookup index is
                #   lookup[c, i] = batch_size * s[i] + b[c, i]
                # for i = 0 ... n_class[c] - 1.
                lookup_partitioned_batch_indices = (
                    batch_size * math_ops.range(n_class) +
                    partitioned_batch_indices[c])
                samples_class_c = array_ops.reshape(
                    samples_class_c,
                    array_ops.concat([[n_class * batch_size], event_shape], 0))
                samples_class_c = array_ops.gather(
                    samples_class_c,
                    lookup_partitioned_batch_indices,
                    name="samples_class_c_gather")
                samples_class[c] = samples_class_c

            # Stitch back together the samples across the components.
            lhs_flat_ret = data_flow_ops.dynamic_stitch(
                indices=partitioned_samples_indices, data=samples_class)
            # Reshape back to proper sample, batch, and event shape.
            ret = array_ops.reshape(
                lhs_flat_ret,
                array_ops.concat(
                    [samples_shape, self.event_shape_tensor()], 0))
            ret.set_shape(
                tensor_shape.TensorShape(static_samples_shape).concatenate(
                    self.event_shape))
            return ret
Esempio n. 42
0
def lu_reconstruct(lower_upper, perm, validate_args=False, name=None):
    """The reconstruct one or more matrices from their LU decomposition(s).

  Args:
    lower_upper: `lu` as returned by `tf.linalg.lu`, i.e., if `matmul(P,
      matmul(L, U)) = X` then `lower_upper = L + U - eye`.
    perm: `p` as returned by `tf.linag.lu`, i.e., if `matmul(P, matmul(L, U)) =
      X` then `perm = argmax(P)`.
    validate_args: Python `bool` indicating whether arguments should be checked
      for correctness.
      Default value: `False` (i.e., don't validate arguments).
    name: Python `str` name given to ops managed by this object.
      Default value: `None` (i.e., 'lu_reconstruct').

  Returns:
    x: The original input to `tf.linalg.lu`, i.e., `x` as in,
      `lu_reconstruct(*tf.linalg.lu(x))`.

  #### Examples

  ```python
  import numpy as np
  import tensorflow as tf
  import tensorflow_probability as tfp

  x = [[[3., 4], [1, 2]],
       [[7., 8], [3, 4]]]
  x_reconstructed = tf.linalg.lu_reconstruct(*tf.linalg.lu(x))
  tf.assert_near(x, x_reconstructed)
  # ==> True
  ```

  """
    with ops.name_scope(name or 'lu_reconstruct'):
        lower_upper = ops.convert_to_tensor(lower_upper,
                                            dtype_hint=dtypes.float32,
                                            name='lower_upper')
        perm = ops.convert_to_tensor(perm,
                                     dtype_hint=dtypes.int32,
                                     name='perm')

        assertions = lu_reconstruct_assertions(lower_upper, perm,
                                               validate_args)
        if assertions:
            with ops.control_dependencies(assertions):
                lower_upper = array_ops.identity(lower_upper)
                perm = array_ops.identity(perm)

        shape = array_ops.shape(lower_upper)

        lower = set_diag(band_part(lower_upper, num_lower=-1, num_upper=0),
                         array_ops.ones(shape[:-1], dtype=lower_upper.dtype))
        upper = band_part(lower_upper, num_lower=0, num_upper=-1)
        x = math_ops.matmul(lower, upper)

        if (lower_upper.shape is None or lower_upper.shape.rank is None
                or lower_upper.shape.rank != 2):
            # We either don't know the batch rank or there are >0 batch dims.
            batch_size = math_ops.reduce_prod(shape[:-2])
            d = shape[-1]
            x = array_ops.reshape(x, [batch_size, d, d])
            perm = array_ops.reshape(perm, [batch_size, d])
            perm = map_fn.map_fn(array_ops.invert_permutation, perm)
            batch_indices = array_ops.broadcast_to(
                math_ops.range(batch_size)[:, array_ops.newaxis],
                [batch_size, d])
            x = array_ops.gather_nd(
                x, array_ops.stack([batch_indices, perm], axis=-1))
            x = array_ops.reshape(x, shape)
        else:
            x = array_ops.gather(x, array_ops.invert_permutation(perm))

        x.set_shape(lower_upper.shape)
        return x
Esempio n. 43
0
 def testEmptyGradients(self):
     with self.session(use_gpu=True):
         x = array_ops.zeros([0, 3])
         y = math_ops.reduce_prod(x, [1])
         error = gradient_checker.compute_gradient_error(x, [0, 3], y, [0])
         self.assertEqual(error, 0)
 def indicator(x):
     x1_times_x2 = math_ops.reduce_prod(x, axis=[-1])
     return 0.5 * (math_ops.sign(x1_times_x2) + 1.0)
Esempio n. 45
0
 def call(self, inputs, mask=None):
     ##### the above line(s) were modified by Ngaiman Chow on 2019-10-29 for including parameter mask
     if not isinstance(inputs, list):
         raise ValueError(
             'A merge layer should be called on a list of inputs.')
     if self._reshape_required:
         reshaped_inputs = []
         input_ndims = list(map(K.ndim, inputs))
         if None not in input_ndims:
             # If ranks of all inputs are available,
             # we simply expand each of them at axis=1
             # until all of them have the same rank.
             max_ndim = max(input_ndims)
             for x in inputs:
                 x_ndim = K.ndim(x)
                 for _ in range(max_ndim - x_ndim):
                     x = array_ops.expand_dims(x, axis=1)
                 reshaped_inputs.append(x)
             return self._merge_function(reshaped_inputs)
         else:
             # Transpose all inputs so that batch size is the last dimension.
             # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size)
             transposed = False
             for x in inputs:
                 x_ndim = K.ndim(x)
                 if x_ndim is None:
                     x_shape = array_ops.shape(x)
                     batch_size = x_shape[0]
                     new_shape = K.concatenate([
                         x_shape[1:],
                         array_ops.expand_dims(batch_size, axis=-1)
                     ])
                     x_transposed = array_ops.reshape(
                         x,
                         array_ops.stack([
                             batch_size,
                             math_ops.reduce_prod(x_shape[1:])
                         ],
                                         axis=0))
                     x_transposed = array_ops.transpose(x_transposed,
                                                        perm=(1, 0))
                     x_transposed = array_ops.reshape(
                         x_transposed, new_shape)
                     reshaped_inputs.append(x_transposed)
                     transposed = True
                 elif x_ndim > 1:
                     dims = list(range(1, x_ndim)) + [0]
                     reshaped_inputs.append(
                         array_ops.transpose(x, perm=dims))
                     transposed = True
                 else:
                     # We don't transpose inputs if they are 1D vectors or scalars.
                     reshaped_inputs.append(x)
             y = self._merge_function(reshaped_inputs)
             y_ndim = K.ndim(y)
             if transposed:
                 # If inputs have been transposed, we have to transpose the output too.
                 if y_ndim is None:
                     y_shape = array_ops.shape(y)
                     y_ndim = array_ops.shape(y_shape)[0]
                     batch_size = y_shape[y_ndim - 1]
                     new_shape = K.concatenate([
                         array_ops.expand_dims(batch_size, axis=-1),
                         y_shape[:y_ndim - 1]
                     ])
                     y = array_ops.reshape(y, (-1, batch_size))
                     y = array_ops.transpose(y, perm=(1, 0))
                     y = array_ops.reshape(y, new_shape)
                 elif y_ndim > 1:
                     dims = [y_ndim - 1] + list(range(y_ndim - 1))
                     y = array_ops.transpose(y, perm=dims)
             return y
     else:
         return self._merge_function(inputs)
Esempio n. 46
0
    def run_test_sample_consistent_log_prob(self,
                                            sess_run_fn,
                                            dist,
                                            num_samples=int(1e5),
                                            num_threshold=int(1e3),
                                            seed=42,
                                            batch_size=None,
                                            rtol=1e-2,
                                            atol=0.):
        """Tests that sample/log_prob are consistent with each other.

    "Consistency" means that `sample` and `log_prob` correspond to the same
    distribution.

    Note: this code only verifies a necessary condition for consistency--it does
    does not verify sufficiency hence does not prove `sample`, `log_prob` truly
    are consistent.

    Args:
      sess_run_fn: Python `callable` taking `list`-like of `Tensor`s and
        returning a list of results after running one "step" of TensorFlow
        computation, typically set to `sess.run`.
      dist: Distribution instance or object which implements `sample`,
        `log_prob`, `event_shape_tensor` and `batch_shape_tensor`.
      num_samples: Python `int` scalar indicating the number of Monte-Carlo
        samples to draw from `dist`.
      num_threshold: Python `int` scalar indicating the number of samples a
        bucket must contain before being compared to the probability.
        Default value: 1e3; must be at least 1.
        Warning, set too high will cause code to falsely pass but setting too
        low will cause the code to falsely fail.
      seed: Python `int` indicating the seed to use when sampling from `dist`.
        In general it is not recommended to use `None` during a code as this
        increases the likelihood of spurious code failure.
      batch_size: Hint for unpacking result of samples. Default: `None` means
        batch_size is inferred.
      rtol: Python `float`-type indicating the admissible relative error between
        analytical and sample statistics.
      atol: Python `float`-type indicating the admissible absolute error between
        analytical and sample statistics.

    Raises:
      ValueError: if `num_threshold < 1`.
    """
        if num_threshold < 1:
            raise ValueError(
                "num_threshold({}) must be at least 1.".format(num_threshold))
        # Histogram only supports vectors so we call it once per batch coordinate.
        y = dist.sample(num_samples, seed=seed)
        y = array_ops.reshape(y, shape=[num_samples, -1])
        if batch_size is None:
            batch_size = math_ops.reduce_prod(dist.batch_shape_tensor())
        batch_dims = array_ops.shape(dist.batch_shape_tensor())[0]
        edges_expanded_shape = 1 + array_ops.pad([-2],
                                                 paddings=[[0, batch_dims]])
        for b, x in enumerate(array_ops.unstack(y, num=batch_size, axis=1)):
            counts, edges = self.histogram(x)
            edges = array_ops.reshape(edges, edges_expanded_shape)
            probs = math_ops.exp(dist.log_prob(edges))
            probs = array_ops.reshape(probs, shape=[-1, batch_size])[:, b]

            [counts_, probs_] = sess_run_fn([counts, probs])
            valid = counts_ > num_threshold
            probs_ = probs_[valid]
            counts_ = counts_[valid]
            self.assertAllClose(probs_,
                                counts_ / num_samples,
                                rtol=rtol,
                                atol=atol)
Esempio n. 47
0
 def tensors_to_item(self, keys_to_tensors):
   item = self._handler.tensors_to_item(keys_to_tensors)
   return control_flow_ops.cond(
       pred=math_ops.equal(math_ops.reduce_prod(array_ops.shape(item)), 0),
       true_fn=lambda: self._backup.tensors_to_item(keys_to_tensors),
       false_fn=lambda: item)
Esempio n. 48
0
def fill_lower_triangular(x, validate_args=False, name="fill_lower_triangular"):
  """Creates a (batch of) lower triangular matrix from a vector of inputs.

  If `x.get_shape()` is `[b1, b2, ..., bK, d]` then the output shape is `[b1,
  b2, ..., bK, n, n]` where `n` is such that `d = n(n+1)/2`, i.e.,
  `n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))`.

  Although the non-batch complexity is O(n**2), large constants and sub-optimal
  vectorization means the complexity of this function is 5x slower than zeroing
  out the upper triangular, i.e., `tf.matrix_band_part(X, -1, 0)`. This
  function becomes competitive only when several matmul/cholesky/etc ops can be
  ellided in constructing the input. Example: wiring a fully connected layer as
  a covariance matrix; this function reduces the final layer by 2x and possibly
  reduces the network arch complexity considerably. In most cases it is better
  to simply build a full matrix and zero out the upper triangular elements,
  e.g., `tril = tf.matrix_band_part(full, -1, 0)`, rather than directly
  construct a lower triangular.

  Example:

  ```python
  fill_lower_triangular([1, 2, 3, 4, 5, 6])
  # Returns: [[1, 0, 0],
  #           [2, 3, 0],
  #           [4, 5, 6]]
  ```

  For comparison, a pure numpy version of this function can be found in
  `distribution_util_test.py`, function `_fill_lower_triangular`.

  Args:
    x: `Tensor` representing lower triangular elements.
    validate_args: Python `bool`, default `False`. Whether to ensure the shape
      of `x` can be mapped to a lower triangular matrix (controls non-static
      checks only).
    name: Python `str`. The name to give this op.

  Returns:
    tril: `Tensor` with lower triangular elements filled from `x`.

  Raises:
    ValueError: if shape if `x` has static shape which cannot be mapped to a
      lower triangular matrix.
  """
  # TODO(jvdillon): Replace this code with dedicated op when it exists.
  with ops.name_scope(name, values=[x]):
    x = ops.convert_to_tensor(x, name="x")
    if (x.get_shape().ndims is not None and
        x.get_shape()[-1].value is not None):
      d = x.get_shape()[-1].value
      # d = n(n+1)/2 implies n is:
      n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))
      d_inferred = n * (n + 1) /2
      if d != d_inferred:
        raise ValueError("Input cannot be mapped to a lower triangular; "
                         "n*(n+1)/2 = %d != %d" % (d_inferred, d))
      final_shape = x.get_shape()[:-1].concatenate(
          tensor_shape.TensorShape([n, n]))
    else:
      d = math_ops.cast(array_ops.shape(x)[-1], dtype=dtypes.float32)
      # d = n(n+1)/2 implies n is:
      n = math_ops.cast(0.5 * (dtypes.sqrt(1. + 8. * d) - 1.),
                        dtype=dtypes.int32)
      if validate_args:
        is_valid_input_shape = check_ops.assert_equal(
            n * (n + 1) / 2, d,
            message="Input cannot be mapped to a lower triangular.")
        n = control_flow_ops.with_dependencies([is_valid_input_shape], n)
      final_shape = x.get_shape()[:-1].concatenate(
          tensor_shape.TensorShape([None, None]))

    def tril_ids(n):
      """Internal helper to create vector of linear indices into y."""
      # Build the ids statically; chose 512 because it implies 1MiB.
      if not tensor_util.is_tensor(n) and n <= 512:
        ids = np.arange(n**2, dtype=np.int32)
        rows = (ids / n).astype(np.int32)  # Implicit floor.
        # We need to stop incrementing the index when we encounter
        # upper-triangular elements. The idea here is to compute the
        # lower-right number of zeros then by "symmetry" subtract this from the
        # total number of zeros, n(n-1)/2.
        # Then we note that: n(n-1)/2 - (n-r)*(n-r-1)/2 = r(2n-r-1)/2
        offset = (rows * (2 * n - rows - 1) / 2).astype(np.int32)
        # We could also zero out when (rows < cols) == (rows < ids-n*rows).
        # mask = (ids <= (n + 1) * rows).astype(np.int32)
      else:
        ids = math_ops.range(n**2)
        rows = math_ops.cast(ids / n, dtype=dtypes.int32)
        offset = math_ops.cast(rows * (2 * n - rows - 1) / 2,
                               dtype=dtypes.int32)
      return ids - offset

    # Special-case non-batch case.
    if x.get_shape().ndims == 1:
      y = array_ops.gather(x, array_ops.reshape(tril_ids(n), [n, n]))
      y = array_ops.matrix_band_part(y, -1, 0)
      y.set_shape(y.get_shape().merge_with(final_shape))
      return y

    # Make ids for each batch dim.
    if (x.get_shape().ndims is not None and
        x.get_shape()[:-1].is_fully_defined()):
      batch_shape = np.asarray(x.get_shape()[:-1].as_list(), dtype=np.int32)
      m = np.prod(batch_shape).astype(np.int32)
    else:
      batch_shape = array_ops.shape(x)[:-1]
      m = math_ops.reduce_prod(array_ops.shape(x)[:-1])
    batch_ids = math_ops.range(m)

    # Assemble the tril_ids into batch,tril_id pairs.
    idx = array_ops.stack([
        array_ops.tile(array_ops.expand_dims(batch_ids, 1), [1, n * n]),
        array_ops.tile(array_ops.expand_dims(tril_ids(n), 0), [m, 1])
    ])
    idx = array_ops.transpose(idx, [1, 2, 0])

    # Gather up, reshape, and return.
    y = array_ops.reshape(x, [-1, d])
    y = array_ops.gather_nd(y, idx)
    y = array_ops.reshape(y, array_ops.concat([batch_shape, [n, n]], 0))
    y = array_ops.matrix_band_part(y, -1, 0)
    y.set_shape(y.get_shape().merge_with(final_shape))
    return y
Esempio n. 49
0
def batch_index(vectors, indices, name=None):
  """Indexes into a batch of vectors.

  Args:
    vectors: An N-D Tensor.
    indices: A K-D integer Tensor, K <= N. The first K - 1 dimensions of indices
        must be broadcastable to the first N - 1 dimensions of vectors.
    name: A name for this operation (optional).

  Returns:
    An N-D Tensor comprised of one element selected from each of the vectors.

  Example usage:
    vectors = [[[1, 2, 3], [4, 5, 6]],
               [[7, 8, 9], [1, 2, 3]]]

    batch_index(vectors, 0)
    => [[1, 4],
        [7, 1]]

    batch_index(vectors, [0])
    => [[[1], [4]],
        [[7], [1]]]

    batch_index(vectors, [0, 0, 2, 2])
    => [[[1, 1, 3, 3], [4, 4, 6, 6]],
        [[7, 7, 9, 9], [1, 1, 3, 3]]]

    batch_index(vectors, [[0, 0, 2, 2], [0, 1, 2, 0]])
    => [[[1, 1, 3, 3], [4, 5, 6, 4]],
        [[7, 7, 9, 9], [1, 2, 3, 1]]]
  """
  with ops.op_scope([vectors, indices], name, "BatchIndex"):
    vectors = ops.convert_to_tensor(vectors, name="vectors")
    vectors_shape = array_ops.shape(vectors)
    vectors_rank = array_ops.size(vectors_shape)

    indices = ops.convert_to_tensor(indices, name="indices")
    indices_shape = array_ops.shape(indices)
    indices_rank = array_ops.size(indices_shape)

    # Support scalar indices.
    indices_are_scalar = None
    indices_are_scalar_tensor = math_ops.equal(0, indices_rank)
    if indices.get_shape().ndims is not None:
      indices_are_scalar = indices.get_shape().ndims == 0

    if indices_are_scalar is None:
      indices, num_selected = control_flow_ops.cond(
          indices_are_scalar_tensor,
          lambda: [array_ops.expand_dims(indices, 0),  # pylint: disable=g-long-lambda
                   array_ops.constant(1, dtype=indices_shape.dtype)],
          lambda: [indices, array_ops.gather(indices_shape, indices_rank - 1)])
    elif indices_are_scalar:
      num_selected = 1
      indices = array_ops.expand_dims(indices, 0)
    else:
      num_selected = array_ops.gather(indices_shape, indices_rank - 1)

    # The batch shape is the first N-1 dimensions of `vectors`.
    batch_shape = array_ops.slice(
        vectors_shape, [0], array_ops.pack([vectors_rank - 1]))
    batch_size = math_ops.reduce_prod(batch_shape)

    # Broadcast indices to have shape `batch_shape + [num_selected]`
    bcast_shape = array_ops.concat(0, [batch_shape, [1]])
    bcast_indices = indices + array_ops.zeros(bcast_shape, dtype=indices.dtype)

    # At this point, the first N-1 dimensions of `vectors` and
    # `bcast_indices` agree, and we're almost ready to call
    # `gather_nd`. But first we need to assign each index to a batch,
    # and we do that below by counting up to `batch_size`, repeating
    # each element `num_selected` times.
    batch_count = array_ops.tile(
        array_ops.expand_dims(math_ops.range(batch_size), 1),
        array_ops.pack([1, num_selected]))
    batch_count.set_shape([vectors.get_shape()[:-1].num_elements(),
                           indices.get_shape()[-1]])

    # Flatten the batch dimensions and gather.
    nd_indices = array_ops.concat(
        1, [array_ops.reshape(batch_count, [-1, 1]),
            array_ops.reshape(bcast_indices, [-1, 1])])
    nd_batches = array_ops.reshape(vectors, array_ops.pack([batch_size, -1]))
    ret = array_ops.gather_nd(nd_batches, nd_indices)

    # Reshape the output.
    if indices_are_scalar is None:
      ret = control_flow_ops.cond(
          indices_are_scalar_tensor,
          lambda: array_ops.reshape(ret, batch_shape),
          lambda: array_ops.reshape(  # pylint: disable=g-long-lambda
              ret,
              array_ops.concat(
                  0, [batch_shape, array_ops.expand_dims(num_selected, 0)])))
    elif indices_are_scalar:
      ret = array_ops.reshape(ret, batch_shape)
      ret.set_shape(vectors.get_shape()[:-1])
    else:
      ret = array_ops.reshape(
          ret,
          array_ops.concat(
              0, [batch_shape, array_ops.expand_dims(num_selected, 0)]))
      ret.set_shape(vectors.get_shape()[:-1]
                    .concatenate(indices.get_shape()[-1:]))
    return ret
Esempio n. 50
0
 def _determinant(self):
     axis = [-(i + 1) for i in range(self.block_depth)]
     det = math_ops.reduce_prod(self.spectrum, axis=axis)
     return math_ops.cast(det, self.dtype)
Esempio n. 51
0
  def _sample_n(self, n, seed=None):
    x = self.distribution.sample(
        sample_shape=concat_vectors(
            [n],
            self.batch_shape_tensor(),
            self.event_shape_tensor()),
        seed=seed)   # shape: [n, B, e]
    x = [aff.forward(x) for aff in self.endpoint_affine]

    # Get ids as a [n, batch_size]-shaped matrix, unless batch_shape=[] then get
    # ids as a [n]-shaped vector.
    batch_size = self.batch_shape.num_elements()
    if batch_size is None:
      batch_size = array_ops.reduce_prod(self.batch_shape_tensor())
    mix_batch_size = self.mixture_distribution.batch_shape.num_elements()
    if mix_batch_size is None:
      mix_batch_size = math_ops.reduce_prod(
          self.mixture_distribution.batch_shape_tensor())
    ids = self.mixture_distribution.sample(
        sample_shape=concat_vectors(
            [n],
            distribution_util.pick_vector(
                self.is_scalar_batch(),
                np.int32([]),
                [batch_size // mix_batch_size])),
        seed=distribution_util.gen_new_seed(
            seed, "vector_diffeomixture"))
    # We need to flatten batch dims in case mixture_distribution has its own
    # batch dims.
    ids = array_ops.reshape(ids, shape=concat_vectors(
        [n],
        distribution_util.pick_vector(
            self.is_scalar_batch(),
            np.int32([]),
            np.int32([-1]))))

    # Stride `components * quadrature_size` for `batch_size` number of times.
    stride = self.grid.shape.with_rank_at_least(
        2)[-2:].num_elements()
    if stride is None:
      stride = array_ops.reduce_prod(
          array_ops.shape(self.grid)[-2:])
    offset = math_ops.range(start=0,
                            limit=batch_size * stride,
                            delta=stride,
                            dtype=ids.dtype)

    weight = array_ops.gather(
        array_ops.reshape(self.grid, shape=[-1]),
        ids + offset)
    # At this point, weight flattened all batch dims into one.
    # We also need to append a singleton to broadcast with event dims.
    if self.batch_shape.is_fully_defined():
      new_shape = [-1] + self.batch_shape.as_list() + [1]
    else:
      new_shape = array_ops.concat(
          ([-1], self.batch_shape_tensor(), [1]), axis=0)
    weight = array_ops.reshape(weight, shape=new_shape)

    if len(x) != 2:
      # We actually should have already triggered this exception. However as a
      # policy we're putting this exception wherever we exploit the bimixture
      # assumption.
      raise NotImplementedError("Currently only bimixtures are supported; "
                                "len(scale)={} is not 2.".format(len(x)))

    # Alternatively:
    # x = weight * x[0] + (1. - weight) * x[1]
    x = weight * (x[0] - x[1]) + x[1]

    return x
Esempio n. 52
0
def safe_embedding_lookup_sparse(embedding_weights,
                                 sparse_ids,
                                 sparse_weights=None,
                                 combiner=None,
                                 default_id=None,
                                 name=None,
                                 partition_strategy="div"):
    """Lookup embedding results, accounting for invalid IDs and empty features.

  The partitioned embedding in `embedding_weights` must all be the same shape
  except for the first dimension. The first dimension is allowed to vary as the
  vocabulary size is not necessarily a multiple of `P`.  `embedding_weights`
  may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a
  partitioner.

  Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs
  with non-positive weight. For an entry with no features, the embedding vector
  for `default_id` is returned, or the 0-vector if `default_id` is not supplied.

  The ids and weights may be multi-dimensional. Embeddings are always aggregated
  along the last dimension.

  Args:
    embedding_weights:  A list of `P` float tensors or values representing
        partitioned embedding tensors.  Alternatively, a `PartitionedVariable`,
        created by partitioning along dimension 0.  The total unpartitioned
        shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the
        vocab size and `e_1, ..., e_m` are the embedding dimensions.
    sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the
        ids. `d_0` is typically batch size.
    sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing
        float weights corresponding to `sparse_ids`, or `None` if all weights
        are be assumed to be 1.0.
    combiner: A string specifying how to combine embedding results for each
        entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean"
        the default.
    default_id: The id to use for an entry with no features.
    name: A name for this operation (optional).
    partition_strategy: A string specifying the partitioning strategy.
        Currently `"div"` and `"mod"` are supported. Default is `"div"`.


  Returns:
    Dense tensor of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`.

  Raises:
    ValueError: if `embedding_weights` is empty.
  """
    if combiner is None:
        logging.warn("The default value of combiner will change from \"mean\" "
                     "to \"sqrtn\" after 2016/11/01.")
        combiner = "mean"
    if embedding_weights is None or len(embedding_weights) < 1:
        raise ValueError("Missing embedding_weights %s." % embedding_weights)

    dtype = sparse_weights.dtype if sparse_weights is not None else None
    if isinstance(embedding_weights, variables.PartitionedVariable):
        embedding_weights = list(embedding_weights)
    embedding_weights = [
        ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
    ]

    contrib_tensor_util.assert_same_float_dtype(embedding_weights +
                                                [sparse_weights])

    with ops.name_scope(name, "embedding_lookup", embedding_weights +
                        [sparse_ids, sparse_weights]) as scope:
        # Reshape higher-rank sparse ids and weights to linear segment ids.
        original_shape = sparse_ids.shape
        original_rank_dim = sparse_ids.shape.get_shape()[0]
        original_rank = (array_ops.size(original_shape)
                         if original_rank_dim.value is None else
                         original_rank_dim.value)
        sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [
            math_ops.reduce_prod(
                array_ops.slice(original_shape, [0], [original_rank - 1])),
            array_ops.gather(original_shape, original_rank - 1)
        ])
        if sparse_weights is not None:
            sparse_weights = ops.SparseTensor(sparse_ids.indices,
                                              sparse_weights.values,
                                              sparse_ids.shape)

        # Prune invalid ids and weights.
        sparse_ids, sparse_weights = _prune_invalid_ids(
            sparse_ids, sparse_weights)

        # Fill in dummy values for empty features, if necessary.
        sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(
            sparse_ids, default_id or 0)
        if sparse_weights is not None:
            sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(
                sparse_weights, 1.0)

        result = embedding_ops.embedding_lookup_sparse(
            embedding_weights,
            sparse_ids,
            sparse_weights,
            combiner=combiner,
            partition_strategy=partition_strategy,
            name=None if default_id is None else scope)

        if default_id is None:
            # Broadcast is_row_empty to the same shape as embedding_lookup_result,
            # for use in Select.
            is_row_empty = array_ops.tile(
                array_ops.reshape(is_row_empty, [-1, 1]),
                array_ops.pack([1, array_ops.shape(result)[1]]))

            result = math_ops.select(is_row_empty,
                                     array_ops.zeros_like(result),
                                     result,
                                     name=scope)

        # Reshape back from linear ids back into higher-dimensional dense result.
        final_result = array_ops.reshape(
            result,
            array_ops.concat(0, [
                array_ops.slice(math_ops.cast(original_shape, dtypes.int32),
                                [0], [original_rank - 1]),
                array_ops.slice(array_ops.shape(result), [1], [-1])
            ]))
        final_result.set_shape(
            tensor_shape.unknown_shape(
                (original_rank_dim - 1).value).concatenate(
                    result.get_shape()[1:]))
        return final_result
Esempio n. 53
0
def _MatrixSquareRootGrad(op, grad):
    """Gradient for MatrixSquareRoot."""

    # Let A be an m x m square matrix (or batch of matrices)
    # Let R = sqrtm(A)
    # By definition, A = RR
    # Take the differential: dA = d(RR) = RdR + dRR
    # Solve the resulting Sylvester equation for dR

    # Used to find Kronecker products within the Sylvester equation
    def _KroneckerProduct(b1, b2):
        """Computes the Kronecker product of two batches of square matrices."""
        b1_shape = array_ops.shape(b1)
        b2_shape = array_ops.shape(b2)
        b1_order = b1_shape[-1]
        b2_order = b2_shape[-1]

        shape_slice_size = [math_ops.subtract(array_ops.size(b1_shape), 2)]
        shape_slice = array_ops.slice(
            b1_shape, [0], shape_slice_size)  # Same for both batches
        b1_reshape_shape = array_ops.concat(
            [shape_slice, [b1_order], [1], [b1_order], [1]], 0)
        b2_reshape_shape = array_ops.concat(
            [shape_slice, [1], [b2_order], [1], [b2_order]], 0)

        b1_reshape = array_ops.reshape(b1, b1_reshape_shape)
        b2_reshape = array_ops.reshape(b2, b2_reshape_shape)

        order_prod = b1_order * b2_order
        kprod_shape = array_ops.concat(
            [shape_slice, [order_prod], [order_prod]], 0)
        return array_ops.reshape(b1_reshape * b2_reshape, kprod_shape)

    sqrtm = op.outputs[0]  # R
    shape = array_ops.shape(sqrtm)
    order = shape[-1]  # m
    matrix_count = math_ops.reduce_prod(shape[0:-2])

    # Get batch of m x m identity matrices
    eye = linalg_ops.eye(order, dtype=sqrtm.dtype)  # m x m identity matrix
    eye_flat = array_ops.reshape(eye, [-1])
    eye_tiled = array_ops.tile(eye_flat, [matrix_count])
    eye_batch = array_ops.reshape(eye_tiled, shape)

    # The transpose of R is taken in the k1 term instead of k2 in
    # order to prevent redundant transposition of R (i.e. (R')' = R)
    sqrtm_transpose = array_ops.matrix_transpose(sqrtm)
    k1 = _KroneckerProduct(eye_batch, sqrtm_transpose)
    k2 = _KroneckerProduct(sqrtm, eye_batch)
    ksum = math_ops.add(k1, k2)

    # Vectorize dA
    shape_slice_size = [math_ops.subtract(array_ops.size(shape), 2)]
    shape_slice = array_ops.slice(shape, [0], shape_slice_size)
    shape_vec_da = array_ops.concat([shape_slice, [order * order], [1]], 0)
    vec_da = array_ops.reshape(array_ops.matrix_transpose(grad), shape_vec_da)

    # Solve for vec(dR)
    vec_dsqrtm = linalg_ops.matrix_solve(ksum, vec_da)

    # Solve for dR by inverse vectorizing vec(dR)
    dsqrtm_transpose = array_ops.reshape(vec_dsqrtm, shape)
    return array_ops.matrix_transpose(dsqrtm_transpose)
Esempio n. 54
0
 def _determinant(self):
     return math_ops.reduce_prod(self._get_diag(), axis=[-1])
Esempio n. 55
0
 def _determinant(self):
     return math_ops.reduce_prod(self._diag, reduction_indices=[-1])
def safe_embedding_lookup_sparse(
        embedding_weights,
        sparse_ids,
        sparse_weights=None,
        combiner="mean",
        default_id=None,
        name="safe_embedding_lookup_sparse",
        partition_strategy=None,  # no used
        max_norm=None,
        return_trainable=False):
    """ Provides a dynamic version of `tf.nn.safe_embedding_lookup_sparse`.

  Lookup embedding results, accounting for empty features and invalid weights.

  Any IDs will be treated as valid include non-positive IDs.
  Invalid weights (<= 0) are pruned from input weights, as well as any IDs
  with non-positive weight. For an entry with no features, the embedding vector
  for `default_id` is returned, or the 0-vector if `default_id` is not supplied.

  The ids and weights may be multi-dimensional. Embeddings are always aggregated
  along the last dimension.

  Args:
    embedding_weights: A single `dynamic_embedding.Variable` instance
      representing the complete embedding tensor.
    sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the
      ids. `d_0` is typically batch size.
    sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing
      float weights corresponding to `sparse_ids`, or `None` if all weights are
      be assumed to be 1.0.
    combiner: A string specifying how to combine embedding results for each
      entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the
      default.
    default_id: The id to use for an entry with no features.
    name: A name for this operation (optional).
    partition_strategy: A string specifying the partitioning strategy. Currently
      `"div"` and `"mod"` are supported. Default is `"div"`.
    max_norm: If not `None`, all embeddings are l2-normalized to max_norm before
      combining.

  Returns:
    combined_embeddings:
      A dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`.
    trainable_wrap:
      A TrainableWrapper object used to fill the Optimizers `var_list`
        Only provided if `return_trainable` is True.

  Raises:
    ValueError: if `embedding_weights` is empty.
  """
    if embedding_weights is None:
        raise ValueError("Missing embedding_weights %s." % embedding_weights)

    if embedding_weights.key_dtype != sparse_ids.dtype:
        raise TypeError(
            "embedding_weights.key_dtype should be same with sparse_ids.dtype: "
            "{} vs. {}".format(embedding_weights.key_dtype, sparse_ids.dtype))

    weights_dtype = sparse_weights.dtype if sparse_weights is not None else None
    if weights_dtype and embedding_weights.value_dtype != weights_dtype:
        raise TypeError(
            "embedding_weights.value_dtype should be same with sparse_weights.dtype"
            ": {} vs. {}".format(embedding_weights.value_dtype, weights_dtype))

    scope = variable_scope.get_variable_scope()
    full_name = scope.name + "/" + name if scope.name else name
    with ops.name_scope(full_name + "/"):
        # Reshape higher-rank sparse ids and weights to linear segment ids.
        original_shape = sparse_ids.dense_shape
        original_rank_dim = tensor_shape.dimension_value(
            sparse_ids.dense_shape.get_shape()[0])
        original_rank = (array_ops.size(original_shape)
                         if original_rank_dim is None else original_rank_dim)
        sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [
            math_ops.reduce_prod(
                array_ops.slice(original_shape, [0], [original_rank - 1])),
            array_ops.gather(original_shape, original_rank - 1)
        ])
        if sparse_weights is not None:
            sparse_weights = sparse_tensor.SparseTensor(
                sparse_ids.indices, sparse_weights.values,
                sparse_ids.dense_shape)

        # Prune invalid weights.
        if combiner != "sum":
            sparse_ids, sparse_weights = _prune_invalid_weights(
                sparse_ids, sparse_weights)

        # Fill in dummy values for empty features, if necessary.
        sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(
            sparse_ids, default_id or 0)
        if sparse_weights is not None:
            sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(
                sparse_weights, 1.0)

        result, trainable_ = embedding_lookup_sparse(
            embedding_weights,
            sparse_ids,
            sparse_weights,
            combiner=combiner,
            partition_strategy=partition_strategy,
            name=name + "/embedding_lookup_sparse",
            max_norm=max_norm,
            return_trainable=True)

        if default_id is None:
            # Broadcast is_row_empty to the same shape as embedding_lookup_result,
            # for use in Select.
            is_row_empty = array_ops.tile(
                array_ops.reshape(is_row_empty, [-1, 1]),
                array_ops.stack([1, array_ops.shape(result)[1]]))

            result = array_ops.where(is_row_empty,
                                     array_ops.zeros_like(result),
                                     result,
                                     name="where")

        # Reshape back from linear ids back into higher-dimensional dense result.
        final_result = array_ops.reshape(
            result,
            array_ops.concat([
                array_ops.slice(math_ops.cast(original_shape, dtypes.int32),
                                [0], [original_rank - 1]),
                array_ops.slice(array_ops.shape(result), [1], [-1])
            ], 0))
        final_result.set_shape(
            tensor_shape.unknown_shape(
                (tensor_shape.Dimension(original_rank_dim) -
                 1).value).concatenate(result.get_shape()[1:]))
        return (final_result, trainable_) if return_trainable else final_result
Esempio n. 57
0
def _FFTSizeForGrad(grad, rank):
    return math_ops.reduce_prod(
        array_ops.slice(array_ops.reverse(array_ops.shape(grad), (True, )),
                        (0, ), (rank, )))
Esempio n. 58
0
def bincount(arr,
             weights=None,
             minlength=None,
             maxlength=None,
             dtype=dtypes.int32,
             name=None,
             axis=None,
             binary_output=False):
    """Counts the number of occurrences of each value in an integer array.

  If `minlength` and `maxlength` are not given, returns a vector with length
  `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise.
  If `weights` are non-None, then index `i` of the output stores the sum of the
  value in `weights` at each index where the corresponding value in `arr` is
  `i`.

  ```python
  values = tf.constant([1,1,2,3,2,4,4,5])
  tf.math.bincount(values) #[0 2 2 1 2 1]
  ```
  Vector length = Maximum element in vector `values` is 5. Adding 1, which is 6
                  will be the vector length.

  Each bin value in the output indicates number of occurrences of the particular
  index. Here, index 1 in output has a value 2. This indicates value 1 occurs
  two times in `values`.

  ```python
  values = tf.constant([1,1,2,3,2,4,4,5])
  weights = tf.constant([1,5,0,1,0,5,4,5])
  tf.math.bincount(values, weights=weights) #[0 6 0 1 9 5]
  ```
  Bin will be incremented by the corresponding weight instead of 1.
  Here, index 1 in output has a value 6. This is the summation of weights
  corresponding to the value in `values`.

  **Bin-counting on a certain axis**

  This example takes a 2 dimensional input and returns a `Tensor` with
  bincounting on each sample.

  >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32)
  >>> tf.math.bincount(data, axis=-1)
  <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
    array([[1, 1, 1, 1],
           [2, 1, 1, 0]], dtype=int32)>


  **Bin-counting with binary_output**

  This example gives binary output instead of counting the occurrence.

  >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32)
  >>> tf.math.bincount(data, axis=-1, binary_output=True)
  <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
    array([[1, 1, 1, 1],
           [1, 1, 1, 0]], dtype=int32)>

  Args:
    arr: A Tensor, RaggedTensor, or SparseTensor whose values should be counted.
      These tensors must have a rank of 2 if `axis=-1`.
    weights: If non-None, must be the same shape as arr. For each value in
      `arr`, the bin will be incremented by the corresponding weight instead of
      1.
    minlength: If given, ensures the output has length at least `minlength`,
      padding with zeros at the end if necessary.
    maxlength: If given, skips values in `arr` that are equal or greater than
      `maxlength`, ensuring that the output has length at most `maxlength`.
    dtype: If `weights` is None, determines the type of the output bins.
    name: A name scope for the associated operations (optional).
    axis: The axis to slice over. Axes at and below `axis` will be flattened
      before bin counting. Currently, only `0`, and `-1` are supported. If None,
      all axes will be flattened (identical to passing `0`).
    binary_output: If True, this op will output 1 instead of the number of times
      a token appears (equivalent to one_hot + reduce_any instead of one_hot +
      reduce_add). Defaults to False.

  Returns:
    A vector with the same dtype as `weights` or the given `dtype`. The bin
    values.

  Raises:
    `InvalidArgumentError` if negative values are provided as an input.

  """
    name = "bincount" if name is None else name
    with ops.name_scope(name):
        # Somehow forward compatible needs to be False.
        if not binary_output and axis is None:
            arr = ops.convert_to_tensor(arr, name="arr", dtype=dtypes.int32)
            array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0
            output_size = math_ops.cast(array_is_nonempty, dtypes.int32) * (
                math_ops.reduce_max(arr) + 1)
            if minlength is not None:
                minlength = ops.convert_to_tensor(minlength,
                                                  name="minlength",
                                                  dtype=dtypes.int32)
                output_size = gen_math_ops.maximum(minlength, output_size)
            if maxlength is not None:
                maxlength = ops.convert_to_tensor(maxlength,
                                                  name="maxlength",
                                                  dtype=dtypes.int32)
                output_size = gen_math_ops.minimum(maxlength, output_size)
            if weights is not None:
                weights = ops.convert_to_tensor(weights, name="weights")
                return gen_math_ops.unsorted_segment_sum(
                    weights, arr, output_size)
            weights = constant_op.constant([], dtype)
            arr = array_ops.reshape(arr, [-1])
            return gen_math_ops.bincount(arr, output_size, weights)

        if not isinstance(arr, sparse_tensor.SparseTensor):
            arr = ragged_tensor.convert_to_tensor_or_ragged_tensor(arr,
                                                                   name="arr")
        if weights is not None:
            if not isinstance(weights, sparse_tensor.SparseTensor):
                weights = ragged_tensor.convert_to_tensor_or_ragged_tensor(
                    weights, name="weights")

        if weights is not None and binary_output:
            raise ValueError(
                "Arguments `binary_output` and `weights` are mutually "
                "exclusive. Please specify only one.")

        if not arr.dtype.is_integer:
            arr = math_ops.cast(arr, dtypes.int32)
        if axis is None:
            axis = 0

        if axis not in [0, -1]:
            raise ValueError(
                f"Unsupported value for argument axis={axis}. Only 0 and"
                " -1 are currently supported.")

        if isinstance(arr, ragged_tensor.RaggedTensor):
            array_is_nonempty = math_ops.reduce_prod(
                array_ops.shape(arr.values)) > 0
        else:
            array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0
        if isinstance(arr, sparse_tensor.SparseTensor):
            output_size = math_ops.cast(array_is_nonempty, arr.dtype) * (
                math_ops.reduce_max(arr.values) + 1)
        else:
            output_size = math_ops.cast(
                array_is_nonempty, arr.dtype) * (math_ops.reduce_max(arr) + 1)
        if minlength is not None:
            minlength = ops.convert_to_tensor(minlength,
                                              name="minlength",
                                              dtype=arr.dtype)
            output_size = gen_math_ops.maximum(minlength, output_size)
        if maxlength is not None:
            maxlength = ops.convert_to_tensor(maxlength,
                                              name="maxlength",
                                              dtype=arr.dtype)
            output_size = gen_math_ops.minimum(maxlength, output_size)

        if axis == 0:
            if isinstance(arr, sparse_tensor.SparseTensor):
                if weights is not None:
                    weights = validate_sparse_weights(arr, weights, dtype)
                arr = arr.values
            elif isinstance(arr, ragged_tensor.RaggedTensor):
                if weights is not None:
                    weights = validate_ragged_weights(arr, weights, dtype)
                arr = arr.values
            else:
                if weights is not None:
                    weights = array_ops.reshape(weights, [-1])
                arr = array_ops.reshape(arr, [-1])

        if isinstance(arr, sparse_tensor.SparseTensor):
            weights = validate_sparse_weights(arr, weights, dtype)
            return gen_math_ops.sparse_bincount(indices=arr.indices,
                                                values=arr.values,
                                                dense_shape=arr.dense_shape,
                                                size=output_size,
                                                weights=weights,
                                                binary_output=binary_output)
        elif isinstance(arr, ragged_tensor.RaggedTensor):
            weights = validate_ragged_weights(arr, weights, dtype)
            return gen_math_ops.ragged_bincount(splits=arr.row_splits,
                                                values=arr.values,
                                                size=output_size,
                                                weights=weights,
                                                binary_output=binary_output)
        else:
            weights = validate_dense_weights(arr, weights, dtype)
            return gen_math_ops.dense_bincount(input=arr,
                                               size=output_size,
                                               weights=weights,
                                               binary_output=binary_output)
Esempio n. 59
0
    def per_step_batch_loss(self, features, mode, state):
        """Computes predictions, losses, and intermediate model states.

    Args:
      features: A dictionary with times, values, and (optionally) exogenous
          regressors. See `define_loss`.
      mode: The tf.estimator.ModeKeys mode to use (TRAIN, EVAL, INFER).
      state: Model-dependent state, each with size [batch size x ...]. The
          number and type will typically be fixed by the model (for example a
          mean and variance).
    Returns:
      A tuple of (loss, filtered_states, predictions)
        loss: Average loss values across the batch.
        filtered_states: For each Tensor in `state` with shape [batch size x
            ...], `filtered_states` has a Tensor with shape [batch size x window
            size x ...] with filtered state for each part of the batch and
            window.
        predictions: A dictionary with model-dependent one-step-ahead (or
            at-least-one-step-ahead with missing values) predictions, with keys
            indicating the type of prediction and values having shape [batch
            size x window size x ...]. For example state space models provide
            "mean", "covariance", and "log_likelihood".

    """
        self._check_graph_initialized()
        times = math_ops.cast(features[TrainEvalFeatures.TIMES],
                              dtype=dtypes.int64)
        values = math_ops.cast(features[TrainEvalFeatures.VALUES],
                               dtype=self.dtype)
        exogenous_regressors = self._process_exogenous_features(
            times=times,
            features={
                key: value
                for key, value in features.items() if key not in
                [TrainEvalFeatures.TIMES, TrainEvalFeatures.VALUES]
            })

        def _batch_loss_filtering_step(step_number, current_times, state):
            """Make a prediction and update it based on data."""
            current_values = values[:, step_number, :]
            state = self._apply_exogenous_update(
                step_number=step_number,
                current_times=current_times,
                state=state,
                raw_features=features,
                embedded_exogenous_regressors=exogenous_regressors)
            predicted_state, predictions = self._prediction_step(
                current_times=current_times, state=state)
            filtered_state, outputs = self._filtering_step(
                current_times=current_times,
                current_values=current_values,
                state=predicted_state,
                predictions=predictions)
            return filtered_state, outputs

        state, outputs = self._state_update_loop(
            times=times,
            state=state,
            state_update_fn=_batch_loss_filtering_step,
            outputs=["loss"] + self._train_output_names)
        outputs["loss"].set_shape(times.get_shape())
        loss_sum = math_ops.reduce_sum(outputs["loss"])
        per_observation_loss = (loss_sum / math_ops.cast(
            math_ops.reduce_prod(array_ops.shape(times)), dtype=self.dtype))
        per_observation_loss += self._loss_additions(times, values, mode)
        # Since we have window-level additions to the loss, its per-step value is
        # misleading, so we avoid returning it.
        del outputs["loss"]
        return per_observation_loss, state, outputs
Esempio n. 60
0
 def _tf_reduce(self, x, reduction_axes, keepdims):
     return math_ops.reduce_prod(x, reduction_axes, keepdims)