Example #1
0
def _SubGrad(op, grad):
    x = op.inputs[0]
    y = op.inputs[1]
    sx = array_ops.shape(x)
    sy = array_ops.shape(y)
    rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
    return (array_ops.reshape(math_ops.reduce_sum(grad, rx), sx), array_ops.reshape(-math_ops.reduce_sum(grad, ry), sy))
Example #2
0
def crf_unary_score(tag_indices, sequence_lengths, inputs):
  """Computes the unary scores of tag sequences.

  Args:
    tag_indices: A [batch_size, max_seq_len] matrix of tag indices.
    sequence_lengths: A [batch_size] vector of true sequence lengths.
    inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials.
  Returns:
    unary_scores: A [batch_size] vector of unary scores.
  """
  batch_size = array_ops.shape(inputs)[0]
  max_seq_len = array_ops.shape(inputs)[1]
  num_tags = array_ops.shape(inputs)[2]

  flattened_inputs = array_ops.reshape(inputs, [-1])

  offsets = array_ops.expand_dims(
      math_ops.range(batch_size) * max_seq_len * num_tags, 1)
  offsets += array_ops.expand_dims(math_ops.range(max_seq_len) * num_tags, 0)
  flattened_tag_indices = array_ops.reshape(offsets + tag_indices, [-1])

  unary_scores = array_ops.reshape(
      array_ops.gather(flattened_inputs, flattened_tag_indices),
      [batch_size, max_seq_len])

  masks = _lengths_to_masks(sequence_lengths, array_ops.shape(tag_indices)[1])

  unary_scores = math_ops.reduce_sum(unary_scores * masks, 1)
  return unary_scores
Example #3
0
def embedding_lookup_unique(params, ids, name=None):
  """Version of embedding_lookup that avoids duplicate lookups.

  This can save communication in the case of repeated ids.
  Same interface as embedding_lookup. Except it supports multi-dimensional `ids`
  which allows to not reshape input/output to fit gather.

  Args:
    params: A list of tensors with the same shape and type, or a
      `PartitionedVariable`. Shape `[index, d1, d2, ...]`.
    ids: A one-dimensional `Tensor` with type `int32` or `int64` containing
      the ids to be looked up in `params`. Shape `[ids1, ids2, ...]`.
    name: A name for this operation (optional).

  Returns:
    A `Tensor` with the same type as the tensors in `params` and dimension of
    `[ids1, ids2, d1, d2, ...]`.

  Raises:
    ValueError: If `params` is empty.
  """
  with ops.name_scope(name, "EmbeddingLookupUnique", [params, ids]):
    ids = ops.convert_to_tensor(ids)
    shape = array_ops.shape(ids)
    ids_flat = array_ops.reshape(
        ids, math_ops.reduce_prod(shape, keep_dims=True))
    unique_ids, idx = array_ops.unique(ids_flat)
    unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids)
    embeds_flat = array_ops.gather(unique_embeddings, idx)
    embed_shape = array_ops.concat(
        [shape, array_ops.shape(unique_embeddings)[1:]], 0)
    embeds = array_ops.reshape(embeds_flat, embed_shape)
    embeds.set_shape(ids.get_shape().concatenate(
        unique_embeddings.get_shape()[1:]))
    return embeds
Example #4
0
  def _sample_n(self, n, seed):
    batch_shape = self.batch_shape_tensor()
    event_shape = self.event_shape_tensor()
    batch_ndims = array_ops.shape(batch_shape)[0]

    ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
    shape = array_ops.concat([[n], batch_shape, event_shape], 0)

    # Complexity: O(nbk**2)
    x = random_ops.random_normal(shape=shape,
                                 mean=0.,
                                 stddev=1.,
                                 dtype=self.dtype,
                                 seed=seed)

    # Complexity: O(nbk)
    # This parametrization is equivalent to Chi2, i.e.,
    # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
    expanded_df = self.df * array_ops.ones(
        self.scale_operator.batch_shape_tensor(),
        dtype=self.df.dtype.base_dtype)
    g = random_ops.random_gamma(shape=[n],
                                alpha=self._multi_gamma_sequence(
                                    0.5 * expanded_df, self.dimension),
                                beta=0.5,
                                dtype=self.dtype,
                                seed=distribution_util.gen_new_seed(
                                    seed, "wishart"))

    # Complexity: O(nbk**2)
    x = array_ops.matrix_band_part(x, -1, 0)  # Tri-lower.

    # Complexity: O(nbk)
    x = array_ops.matrix_set_diag(x, math_ops.sqrt(g))

    # Make batch-op ready.
    # Complexity: O(nbk**2)
    perm = array_ops.concat([math_ops.range(1, ndims), [0]], 0)
    x = array_ops.transpose(x, perm)
    shape = array_ops.concat([batch_shape, [event_shape[0]], [-1]], 0)
    x = array_ops.reshape(x, shape)

    # Complexity: O(nbM) where M is the complexity of the operator solving a
    # vector system. E.g., for LinearOperatorDiag, each matmul is O(k**2), so
    # this complexity is O(nbk**2). For LinearOperatorLowerTriangular,
    # each matmul is O(k^3) so this step has complexity O(nbk^3).
    x = self.scale_operator.matmul(x)

    # Undo make batch-op ready.
    # Complexity: O(nbk**2)
    shape = array_ops.concat([batch_shape, event_shape, [n]], 0)
    x = array_ops.reshape(x, shape)
    perm = array_ops.concat([[ndims - 1], math_ops.range(0, ndims - 1)], 0)
    x = array_ops.transpose(x, perm)

    if not self.cholesky_input_output_matrices:
      # Complexity: O(nbk^3)
      x = math_ops.matmul(x, x, adjoint_b=True)

    return x
Example #5
0
  def _full_batch_training_op(self, inputs, cluster_idx_list, cluster_centers):
    """Creates an op for training for full batch case.

    Args:
      inputs: list of input Tensors.
      cluster_idx_list: A vector (or list of vectors). Each element in the
        vector corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      cluster_centers: Tensor Ref of cluster centers.

    Returns:
      An op for doing an update of mini-batch k-means.
    """
    cluster_sums = []
    cluster_counts = []
    epsilon = constant_op.constant(1e-6, dtype=inputs[0].dtype)
    for inp, cluster_idx in zip(inputs, cluster_idx_list):
      with ops.colocate_with(inp):
        cluster_sums.append(
            math_ops.unsorted_segment_sum(inp, cluster_idx, self._num_clusters))
        cluster_counts.append(
            math_ops.unsorted_segment_sum(
                array_ops.reshape(
                    array_ops.ones(
                        array_ops.reshape(array_ops.shape(inp)[0], [-1])),
                    [-1, 1]), cluster_idx, self._num_clusters))
    with ops.colocate_with(cluster_centers):
      new_clusters_centers = math_ops.add_n(cluster_sums) / (math_ops.cast(
          math_ops.add_n(cluster_counts), cluster_sums[0].dtype) + epsilon)
      if self._clusters_l2_normalized():
        new_clusters_centers = nn_impl.l2_normalize(new_clusters_centers, dim=1)
    return state_ops.assign(cluster_centers, new_clusters_centers)
def mask_activations_and_labels(activations, labels, sequence_lengths):
  """Remove entries outside `sequence_lengths` and returned flattened results.

  Args:
    activations: Output of the RNN, shape `[batch_size, padded_length, k]`.
    labels: Label values, shape `[batch_size, padded_length]`.
    sequence_lengths: A `Tensor` of shape `[batch_size]` with the unpadded
      length of each sequence. If `None`, then each sequence is unpadded.

  Returns:
    activations_masked: `logit` values with those beyond `sequence_lengths`
      removed for each batch. Batches are then concatenated. Shape
      `[tf.sum(sequence_lengths), k]` if `sequence_lengths` is not `None` and
      shape `[batch_size * padded_length, k]` otherwise.
    labels_masked: Label values after removing unneeded entries. Shape
      `[tf.sum(sequence_lengths)]` if `sequence_lengths` is not `None` and shape
      `[batch_size * padded_length]` otherwise.
  """
  with ops.name_scope('mask_activations_and_labels',
                      values=[activations, labels, sequence_lengths]):
    labels_shape = array_ops.shape(labels)
    batch_size = labels_shape[0]
    padded_length = labels_shape[1]
    if sequence_lengths is None:
      flattened_dimension = padded_length * batch_size
      activations_masked = array_ops.reshape(activations,
                                             [flattened_dimension, -1])
      labels_masked = array_ops.reshape(labels, [flattened_dimension])
    else:
      mask = array_ops.sequence_mask(sequence_lengths, padded_length)
      activations_masked = array_ops.boolean_mask(activations, mask)
      labels_masked = array_ops.boolean_mask(labels, mask)
    return activations_masked, labels_masked
Example #7
0
def _SumGrad(op, grad):
  """Gradient for Sum."""
  # Fast path for when reducing to a scalar and ndims is known: adds only
  # Reshape and Tile ops (and possibly a Shape).
  input_0_shape = op.inputs[0]._shape_tuple()  # pylint: disable=protected-access
  if input_0_shape is not None:
    axes = tensor_util.constant_value(op.inputs[1])
    if axes is not None:
      rank = len(input_0_shape)
      if np.array_equal(axes, np.arange(rank)):  # Reduce all dims.
        grad = array_ops.reshape(grad, [1] * rank)
        # If shape is not fully defined (but rank is), we use Shape.
        if None not in input_0_shape:
          input_shape = input_0_shape
        else:
          input_shape = array_ops.shape(op.inputs[0])
        return [array_ops.tile(grad, input_shape), None]

  input_shape = array_ops.shape(op.inputs[0])
  # TODO(apassos) remove this once device placement for eager ops makes more
  # sense.
  with ops.colocate_with(input_shape):
    output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
    tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims)
  grad = array_ops.reshape(grad, output_shape_kept_dims)
  return [array_ops.tile(grad, tile_scaling), None]
Example #8
0
def _weighted_loss(loss, weight):
  """Returns cumulative weighted loss."""
  unweighted_loss = array_ops.reshape(loss, shape=(-1,))
  weighted_loss = math_ops.mul(unweighted_loss,
                               array_ops.reshape(
                                   weight, shape=(-1,)))
  return weighted_loss
Example #9
0
 def __call__(self, inputs, state, scope=None):
   """Long short-term memory cell with attention (LSTMA)."""
   with vs.variable_scope(scope or type(self).__name__):
     if self._state_is_tuple:
       state, attns, attn_states = state
     else:
       states = state
       state = array_ops.slice(states, [0, 0], [-1, self._cell.state_size])
       attns = array_ops.slice(
           states, [0, self._cell.state_size], [-1, self._attn_size])
       attn_states = array_ops.slice(
           states, [0, self._cell.state_size + self._attn_size],
           [-1, self._attn_size * self._attn_length])
     attn_states = array_ops.reshape(attn_states,
                                     [-1, self._attn_length, self._attn_size])
     input_size = self._input_size
     if input_size is None:
       input_size = inputs.get_shape().as_list()[1]
     inputs = _linear([inputs, attns], input_size, True)
     lstm_output, new_state = self._cell(inputs, state)
     if self._state_is_tuple:
       new_state_cat = array_ops.concat(1, _unpacked_state(new_state))
     else:
       new_state_cat = new_state
     new_attns, new_attn_states = self._attention(new_state_cat, attn_states)
     with vs.variable_scope("AttnOutputProjection"):
       output = _linear([lstm_output, new_attns], self._attn_size, True)
     new_attn_states = array_ops.concat(1, [new_attn_states,
                                            array_ops.expand_dims(output, 1)])
     new_attn_states = array_ops.reshape(
         new_attn_states, [-1, self._attn_length * self._attn_size])
     new_state = (new_state, new_attns, new_attn_states)
     if not self._state_is_tuple:
       new_state = array_ops.concat(1, list(new_state))
     return output, new_state
Example #10
0
 def testBiasVec(self):
   with self.assertRaises(ValueError):
     nn_ops.bias_add(
         array_ops.reshape(
             [1, 2], shape=[1, 2]),
         array_ops.reshape(
             [1, 2], shape=[1, 2]))
Example #11
0
def _strict_conv1d(x, h):
  """Return x * h for rank 1 tensors x and h."""
  with ops.op_scope([x, h], 'strict_conv1d'):
    x = array_ops.reshape(x, (1, -1, 1, 1))
    h = array_ops.reshape(h, (-1, 1, 1, 1))
    result = nn_ops.conv2d(x, h, [1, 1, 1, 1], 'SAME')
    return array_ops.reshape(result, [-1])
Example #12
0
def _inplace_helper(x, i, v, op):
  """Applies an inplace op on (x, i, v).

  op is one of gen_array_ops.alias_inplace_update,
  gen_array_ops.alias_inplace_add, or gen_array_ops.alias_inplace_sub.

  If i is None, x and v must be the same shape. Computes
    x op v;
  If i is a scalar, x has a rank 1 higher than v's. Computes
    x[i, :] op v;
  Otherwise, x and v must have the same rank. Computes
    x[i, :] op v;

  Args:
    x: A Tensor.
    i: None, a scalar or a vector.
    v: A Tensor.
    op: alias_inplace_update, alias_inplace_add, or alias_inplace_sub.

  Returns:
    Returns x.

  """
  x = ops.convert_to_tensor(x)
  v = ops.convert_to_tensor(v, x.dtype)
  if i is None:
    # Full tensor.
    return array_ops.reshape(
        op(array_ops.reshape(x, [1, -1]), [0], array_ops.reshape(v, [1, -1])),
        array_ops.shape(x))
  i = math_ops.cast(i, dtypes.int32)
  if i.get_shape().ndims == 0:
    # Single 0-dim update.
    return op(x, array_ops.reshape(i, [1]), array_ops.expand_dims(v, 0))
  return op(x, i, v)
Example #13
0
  def _expand_sample_shape_to_vector(self, x, name):
    """Helper to `sample` which ensures input is 1D."""
    x_static_val = tensor_util.constant_value(x)
    if x_static_val is None:
      prod = math_ops.reduce_prod(x)
    else:
      prod = np.prod(x_static_val, dtype=x.dtype.as_numpy_dtype())

    ndims = x.get_shape().ndims  # != sample_ndims
    if ndims is None:
      # Maybe expand_dims.
      ndims = array_ops.rank(x)
      expanded_shape = util.pick_vector(
          math_ops.equal(ndims, 0),
          np.array([1], dtype=np.int32), array_ops.shape(x))
      x = array_ops.reshape(x, expanded_shape)
    elif ndims == 0:
      # Definitely expand_dims.
      if x_static_val is not None:
        x = ops.convert_to_tensor(
            np.array([x_static_val], dtype=x.dtype.as_numpy_dtype()),
            name=name)
      else:
        x = array_ops.reshape(x, [1])
    elif ndims != 1:
      raise ValueError("Input is neither scalar nor vector.")

    return x, prod
Example #14
0
 def testInputDims(self):
   with self.test_session(use_gpu=True):
     with self.assertRaises(ValueError):
       array_ops.pad(array_ops.reshape(
           [1, 2], shape=[1, 2, 1, 1, 1, 1]),
                     array_ops.reshape(
                         [1, 2], shape=[1, 2]))
Example #15
0
 def testPaddingsDim4(self):
   with self.test_session(use_gpu=True):
     with self.assertRaises(ValueError):
       array_ops.pad(array_ops.reshape(
           [1, 2], shape=[1, 2]),
                     array_ops.reshape(
                         [1, 2, 3, 4, 5, 6], shape=[3, 2]))
  def testListValuedElementwiseOp(self, inputs, op=math_ops.add_n,
                                  **extra_args):
    use_kwargs = extra_args.pop('use_kwargs', False)
    inputs = [
        ragged_tensor.convert_to_tensor_or_ragged_tensor(x) for x in inputs
    ]
    if use_kwargs:
      result = op(inputs=inputs, **extra_args)
    else:
      result = op(inputs, **extra_args)

    # Run the wrapped op on the dense values, for comparison.
    dense_inputs = [
        x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x
        for x in inputs
    ]
    expected_flat_values = array_ops.reshape(
        op(dense_inputs, **extra_args), [-1])

    # Check that the result has the expected shape.
    self.assertSameShape(inputs[0], result)

    # Check that the result has the expected (flattened) values.
    if isinstance(result, ragged_tensor.RaggedTensor):
      result_flat_values = array_ops.reshape(result.flat_values, [-1])
    else:
      result_flat_values = array_ops.reshape(result, [-1])
    self.assertAllEqual(expected_flat_values, result_flat_values)
Example #17
0
def reduce_to_final(images, num_filters_out, nhidden=None, scope=None):
  """Reduce an image to a final state by running two LSTMs.

  Args:
    images: (num_images, height, width, depth) tensor
    num_filters_out: output layer depth
    nhidden: hidden layer depth (defaults to num_filters_out)
    scope: optional scope name

  Returns:
    A (num_images, num_filters_out) batch.
  """
  with variable_scope.variable_scope(scope, "ReduceToFinal", [images]):
    nhidden = nhidden or num_filters_out
    batch_size, height, width, depth = _shape(images)
    transposed = array_ops.transpose(images, [1, 0, 2, 3])
    reshaped = array_ops.reshape(transposed,
                                 [height, batch_size * width, depth])
    with variable_scope.variable_scope("reduce1"):
      reduced = lstm1d.sequence_to_final(reshaped, nhidden)
      transposed_hidden = array_ops.reshape(reduced,
                                            [batch_size, width, nhidden])
      hidden = array_ops.transpose(transposed_hidden, [1, 0, 2])
    with variable_scope.variable_scope("reduce2"):
      output = lstm1d.sequence_to_final(hidden, num_filters_out)
    return output
Example #18
0
def column_to_tensors(tensors_template, colvec):
  """Converts a column vector back to the shape of the given template.

  Args:
    tensors_template: A tensor or list of tensors.
    colvec: A 2d column vector with the same shape as the value of
        tensors_to_column(tensors_template).

  Returns:
    X, where X is tensor or list of tensors with the properties:
     1) tensors_to_column(X) = colvec
     2) X (or its elements) have the same shape as tensors_template (or its
        elements)
  """
  if isinstance(tensors_template, (tuple, list)):
    offset = 0
    tensors = []
    for tensor_template in tensors_template:
      sz = np.prod(tensor_template.shape.as_list(), dtype=np.int32)
      tensor = array_ops.reshape(colvec[offset:(offset + sz)],
                                 tensor_template.shape)
      tensors.append(tensor)
      offset += sz

    tensors = tuple(tensors)
  else:
    tensors = array_ops.reshape(colvec, tensors_template.shape)

  return tensors
  def testBinaryElementwiseOp(self, x, y, op=math_ops.add, **extra_args):
    use_kwargs = extra_args.pop('use_kwargs', ())
    x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x)
    y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y)
    if 'x' in use_kwargs and 'y' in use_kwargs:
      result = op(x=x, y=y, **extra_args)
    elif 'y' in use_kwargs:
      result = op(x, y=y, **extra_args)
    else:
      result = op(x, y, **extra_args)

    # Run the wrapped op on the dense values, for comparison.
    dense_x = x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x
    dense_y = y.flat_values if isinstance(y, ragged_tensor.RaggedTensor) else y
    expected_flat_values = array_ops.reshape(
        op(dense_x, dense_y, **extra_args), [-1])

    # Check that the result has the expected shape.
    self.assertSameShape(y, result)

    # Check that the result has the expected (flattened) values.
    if isinstance(result, ragged_tensor.RaggedTensor):
      result_flat_values = array_ops.reshape(result.flat_values, [-1])
    else:
      result_flat_values = array_ops.reshape(result, [-1])
    self.assertAllEqual(expected_flat_values, result_flat_values)
Example #20
0
def _TopKGrad(op, grad, _):
  """Return the gradients for TopK.

  Args:
    op: The TopKOp for which we need to generate gradients.
    grad: Tensor. The gradients passed to the TopKOp.

  Returns:
    A list of two tensors, the first being the gradient w.r.t to the input and
    TopK, and the second being the gradient w.r.t. to the indices (all zero).
  """
  in_shape = array_ops.shape(op.inputs[0])
  ind_shape = array_ops.shape(op.outputs[1])

  ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1)
  # Flatten indices to 2D.
  ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack([-1, ind_lastdim]))

  in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1)
  outerdim = array_ops.shape(ind_2d)[0]
  # Compute linear indices (flattened to 1D).
  ind = array_ops.reshape(ind_2d + array_ops.expand_dims(
      math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1])

  # Substitute grad to appropriate locations and fill the rest with zeros,
  # finally reshaping it to the original input shape.
  return [array_ops.reshape(
      sparse_ops.sparse_to_dense(ind,
                                 array_ops.reshape(
                                     math_ops.reduce_prod(in_shape), [1]),
                                 array_ops.reshape(grad, [-1]),
                                 validate_indices=False),
      in_shape), array_ops.zeros(
          [], dtype=dtypes.int32)]
Example #21
0
def kronecker_product(mat1, mat2):
  """Computes the Kronecker product two matrices."""
  m1, n1 = mat1.get_shape().as_list()
  mat1_rsh = array_ops.reshape(mat1, [m1, 1, n1, 1])
  m2, n2 = mat2.get_shape().as_list()
  mat2_rsh = array_ops.reshape(mat2, [1, m2, 1, n2])
  return array_ops.reshape(mat1_rsh * mat2_rsh, [m1 * m2, n1 * n2])
Example #22
0
  def to_weighted_sum(self,
                      input_tensor,
                      num_outputs=1,
                      weight_collections=None,
                      trainable=True):
    """Returns a Tensor as linear predictions and a list of created Variable."""
    dimension = self.source_column.dimension
    batch_size = array_ops.shape(input_tensor)[0]

    if dimension > 1:
      i1 = array_ops.reshape(array_ops.tile(array_ops.expand_dims(
          math_ops.range(0, batch_size), 1), [1, dimension]), [-1])
      i2 = array_ops.tile(math_ops.range(0, dimension), [batch_size])
      # Flatten the bucket indices and unique them across dimensions
      # E.g. 2nd dimension indices will range from k to 2*k-1 with k buckets
      # TODO(chapelle): move that logic to insert_transformed_feature to ensure
      #   unique buckets across dimensions after crossing.
      bucket_indices = array_ops.reshape(input_tensor, [-1]) + self.length * i2
    else:
      # Simpler indices when dimension=1
      i1 = math_ops.range(0, batch_size)
      i2 = array_ops.zeros([batch_size], dtype=dtypes.int32)
      bucket_indices = array_ops.reshape(input_tensor, [-1])

    indices = math_ops.to_int64(array_ops.transpose(array_ops.pack((i1, i2))))
    shape = math_ops.to_int64(array_ops.pack([batch_size, 1]))
    sparse_id_values = ops.SparseTensor(indices, bucket_indices, shape)
    vocab_size = self.length * self.source_column.dimension

    return _create_embedding_lookup(
        sparse_id_values, vocab_size, num_outputs,
        _add_variable_collection(weight_collections), 0., "sum",
        trainable, self.name + "_weights")
Example #23
0
  def call(self, inputs):
    outputs = nn.convolution(
        input=inputs,
        filter=self.masked_kernel,
        dilation_rate=self.dilation_rate,
        strides=self.strides,
        padding=self.padding.upper(),
        data_format=utils.convert_data_format(self.data_format, self.rank + 2))

    if self.bias is not None:
      if self.data_format == 'channels_first':
        if self.rank == 1:
          # nn.bias_add does not accept a 1D input tensor.
          bias = array_ops.reshape(self.bias, (1, self.filters, 1))
          outputs += bias
        if self.rank == 2:
          outputs = nn.bias_add(outputs, self.bias, data_format='NCHW')
        if self.rank == 3:
          # As of Mar 2017, direct addition is significantly slower than
          # bias_add when computing gradients. To use bias_add, we collapse Z
          # and Y into a single dimension to obtain a 4D input tensor.
          outputs_shape = outputs.shape.as_list()
          outputs_4d = array_ops.reshape(outputs, [
              outputs_shape[0], outputs_shape[1],
              outputs_shape[2] * outputs_shape[3], outputs_shape[4]
          ])
          outputs_4d = nn.bias_add(outputs_4d, self.bias, data_format='NCHW')
          outputs = array_ops.reshape(outputs_4d, outputs_shape)
      else:
        outputs = nn.bias_add(outputs, self.bias, data_format='NHWC')

    if self.activation is not None:
      return self.activation(outputs)
    return outputs
 def testSubsampleThreeByThree(self):
   x = array_ops.reshape(math_ops.to_float(math_ops.range(9)), [1, 3, 3, 1])
   x = resnet_utils.subsample(x, 2)
   expected = array_ops.reshape(
       constant_op.constant([0, 2, 6, 8]), [1, 2, 2, 1])
   with self.test_session():
     self.assertAllClose(x.eval(), expected.eval())
 def testSubsampleFourByFour(self):
   x = array_ops.reshape(math_ops.to_float(math_ops.range(16)), [1, 4, 4, 1])
   x = resnet_utils.subsample(x, 2)
   expected = array_ops.reshape(
       constant_op.constant([0, 2, 8, 10]), [1, 2, 2, 1])
   with self.test_session():
     self.assertAllClose(x.eval(), expected.eval())
 def test_discriminator_patch(self):
   loss = self._d_loss_fn(
       array_ops.reshape(self._discriminator_real_outputs, [2, 2]),
       array_ops.reshape(self._discriminator_gen_outputs, [2, 2]))
   self.assertEqual(self._discriminator_gen_outputs.dtype, loss.dtype)
   with self.test_session():
     self.assertAlmostEqual(self._expected_d_loss, loss.eval(), 5)
 def _apply_sparse(self, grad, var):
   if len(grad.indices.get_shape()) == 1:
     grad_indices = grad.indices
     grad_values = grad.values
   else:
     grad_indices = array_ops.reshape(grad.indices, [-1])
     grad_values = array_ops.reshape(grad.values, [-1, grad.values.get_shape()[-1].value])
   gidxs, metagidxs = array_ops.unique(grad_indices)
   sizegidxs = array_ops.size(gidxs)
   gvals = math_ops.unsorted_segment_sum(grad_values, metagidxs, sizegidxs)
   # m_t = mu * m + (1 - mu) * g_t
   m = self.get_slot(var, "m")
   m_scaled_g_values = gvals * (1 - self._mu_t)
   m_t = state_ops.scatter_update(m, gidxs,
                                  array_ops.gather(m, gidxs) * self._mu_t,
                                  use_locking=self._use_locking)
   m_t = state_ops.scatter_add(m_t, gidxs, m_scaled_g_values,
                               use_locking=self._use_locking)
   m_t_ = array_ops.gather(m_t, gidxs) / (1 - self._mu2_t * self._mu_power)
   # m_bar = mu * m_t + (1 - mu) * g_t
   m_bar = self._mu2_t * m_t_ + m_scaled_g_values / (1 - self._mu_power)
   var_update = state_ops.scatter_sub(var, gidxs,
                                    self._lr_t * m_bar,
                                    use_locking=self._use_locking)
   return control_flow_ops.group(*[var_update, m_t])
 def _fn(x):
   """MADE parameterized via `masked_autoregressive_default_template`."""
   # TODO(b/67594795): Better support of dynamic shape.
   input_depth = x.shape.with_rank_at_least(1)[-1].value
   if input_depth is None:
     raise NotImplementedError(
         "Rightmost dimension must be known prior to graph execution.")
   input_shape = (np.int32(x.shape.as_list()) if x.shape.is_fully_defined()
                  else array_ops.shape(x))
   for i, units in enumerate(hidden_layers):
     x = masked_dense(
         inputs=x,
         units=units,
         num_blocks=input_depth,
         exclusive=True if i == 0 else False,
         activation=activation,
         *args,
         **kwargs)
   x = masked_dense(
       inputs=x,
       units=(1 if shift_only else 2) * input_depth,
       num_blocks=input_depth,
       activation=None,
       *args,
       **kwargs)
   if shift_only:
     x = array_ops.reshape(x, shape=input_shape)
     return x, None
   x = array_ops.reshape(
       x, shape=array_ops.concat([input_shape, [2]], axis=0))
   shift, log_scale = array_ops.unstack(x, num=2, axis=-1)
   which_clip = (math_ops.clip_by_value if log_scale_clip_gradient
                 else _clip_by_value_preserve_grad)
   log_scale = which_clip(log_scale, log_scale_min_clip, log_scale_max_clip)
   return shift, log_scale
Example #29
0
def _TileGrad(op, grad):
  """Sum reduces grad along the tiled dimensions."""
  input_shape = array_ops.shape(op.inputs[0])
  # We interleave multiples and input_shape to get split_shape,
  # reshape grad to split_shape, and reduce along all even
  # dimensions (the tiled dimensions) to get the result
  # with shape input_shape.  For example
  #   input_shape = [20, 30, 40]
  #   multiples = [2, 3, 4]
  #   split_shape = [2, 20, 3, 30, 4, 40]
  #   axes = [0, 2, 4]
  split_shape = array_ops.reshape(
      array_ops.transpose(array_ops.stack([op.inputs[1], input_shape])), [-1])
  axes = math_ops.range(0, array_ops.size(split_shape), 2)
  # Sum reduces grad along the first dimension for IndexedSlices
  if isinstance(grad, ops.IndexedSlices):
    grad = math_ops.unsorted_segment_sum(
        grad.values,
        math_ops.mod(grad.indices, input_shape[0]),
        input_shape[0])
    split_shape = array_ops.concat([[1], split_shape[1:]], axis=0)
  input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes)
  # Fix shape inference
  if not context.executing_eagerly():
    input_grad.set_shape(op.inputs[0].get_shape())
  return [input_grad, None]
Example #30
0
def embedding_lookup(params, ids, name='embedding_lookup'):
  """Provides a N dimensional version of tf.embedding_lookup.

  Ids are flattened to a 1d tensor before being passed to embedding_lookup
  then, they are unflattend to match the original ids shape plus an extra
  leading dimension of the size of the embeddings.

  Args:
    params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1.
    ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1.
      Must contain indexes into params.
    name: Optional name for the op.

  Returns:
    A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1
    containing the values from the params tensor(s) for indecies in ids.

  Raises:
    ValueError: if some parameters are invalid.
  """
  with ops.name_scope(name, 'embedding_lookup', [params, ids]):
    params = ops.convert_to_tensor(params)
    ids = ops.convert_to_tensor(ids)
    shape = array_ops_.shape(ids)
    ids_flat = array_ops_.reshape(
        ids, math_ops.reduce_prod(shape, keep_dims=True))
    embeds_flat = nn.embedding_lookup(params, ids_flat, name)
    embed_shape = array_ops_.concat_v2([shape, [-1]], 0)
    embeds = array_ops_.reshape(embeds_flat, embed_shape)
    embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:]))
    return embeds
Example #31
0
 def loss_fn():
     y = array_ops.reshape(layer(x),
                           []) - constant_op.constant(1.)
     return y * y
Example #32
0
def _parse_example_raw(serialized,
                       names=None,
                       sparse_keys=None,
                       sparse_types=None,
                       dense_keys=None,
                       dense_types=None,
                       dense_defaults=None,
                       dense_shapes=None,
                       name=None):
    """Parses `Example` protos.

  Args:
    serialized: A vector (1-D Tensor) of strings, a batch of binary
      serialized `Example` protos.
    names: A vector (1-D Tensor) of strings (optional), the names of
      the serialized protos.
    sparse_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `SparseTensor` objects.
    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_keys: A list of string keys in the examples' features.
      The results for these keys will be returned as `Tensor`s
    dense_types: A list of DTypes of the same length as `dense_keys`.
      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
      and `tf.string` (`BytesList`) are supported.
    dense_defaults: A dict mapping string keys to `Tensor`s.
      The keys of the dict must match the dense_keys of the feature.
    dense_shapes: A list of tuples with the same length as `dense_keys`.
      The shape of the data for each dense feature referenced by `dense_keys`.
      Required for any input tensors identified by `dense_keys` whose shapes are
      anything other than `[]` or `[1]`.
    name: A name for this operation (optional).

  Returns:
    A `dict` mapping keys to `Tensor`s and `SparseTensor`s.

  Raises:
    ValueError: If sparse and dense key sets intersect, or input lengths do not
      match up.
  """
    with ops.op_scope([serialized, names], name, "ParseExample"):
        names = [] if names is None else names
        dense_defaults = {} if dense_defaults is None else dense_defaults
        sparse_keys = [] if sparse_keys is None else sparse_keys
        sparse_types = [] if sparse_types is None else sparse_types
        dense_keys = [] if dense_keys is None else dense_keys
        dense_types = [] if dense_types is None else dense_types
        dense_shapes = ([[]] * len(dense_keys)
                        if dense_shapes is None else dense_shapes)

        num_dense = len(dense_keys)
        num_sparse = len(sparse_keys)

        if len(dense_shapes) != num_dense:
            raise ValueError(
                "len(dense_shapes) != len(dense_keys): %d vs. %d" %
                (len(dense_shapes), num_dense))
        if len(dense_types) != num_dense:
            raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" %
                             (len(dense_types), num_dense))
        if len(sparse_types) != num_sparse:
            raise ValueError(
                "len(sparse_types) != len(sparse_keys): %d vs. %d" %
                (len(sparse_types), num_sparse))
        if num_dense + num_sparse == 0:
            raise ValueError(
                "Must provide at least one sparse key or dense key")
        if not set(dense_keys).isdisjoint(set(sparse_keys)):
            raise ValueError(
                "Dense and sparse keys must not intersect; intersection: %s" %
                set(dense_keys).intersection(set(sparse_keys)))

        dense_defaults_vec = []
        for i, key in enumerate(dense_keys):
            default_value = dense_defaults.get(key)
            if default_value is None:
                default_value = constant_op.constant([], dtype=dense_types[i])
            elif not isinstance(default_value, ops.Tensor):
                key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
                default_value = ops.convert_to_tensor(default_value,
                                                      dtype=dense_types[i],
                                                      name=key_name)
                default_value = array_ops.reshape(default_value,
                                                  dense_shapes[i])

            dense_defaults_vec.append(default_value)

        dense_shapes = [
            tensor_shape.as_shape(shape).as_proto() for shape in dense_shapes
        ]

        # pylint: disable=protected-access
        outputs = gen_parsing_ops._parse_example(
            serialized=serialized,
            names=names,
            dense_defaults=dense_defaults_vec,
            sparse_keys=sparse_keys,
            sparse_types=sparse_types,
            dense_keys=dense_keys,
            dense_shapes=dense_shapes,
            name=name)
        # pylint: enable=protected-access

        (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs

        sparse_tensors = [
            ops.SparseTensor(ix, val, shape)
            for (ix, val,
                 shape) in zip(sparse_indices, sparse_values, sparse_shapes)
        ]

        return dict(
            zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
def _ExtractImagePatchesGrad(op, grad):

    batch_size, rows_in, cols_in, channels = [
        dim.value for dim in op.inputs[0].get_shape()
    ]
    input_bhwc = array_ops.shape(op.inputs[0])
    batch_size = input_bhwc[0]
    channels = input_bhwc[3]

    _, rows_out, cols_out, _ = [dim.value for dim in op.outputs[0].get_shape()]
    _, ksize_r, ksize_c, _ = op.get_attr("ksizes")
    _, stride_r, stride_h, _ = op.get_attr("strides")
    _, rate_r, rate_c, _ = op.get_attr("rates")
    padding = op.get_attr("padding")

    ksize_r_eff = ksize_r + (ksize_r - 1) * (rate_r - 1)
    ksize_c_eff = ksize_c + (ksize_c - 1) * (rate_c - 1)

    if padding == b"SAME":
        rows_out = int(ceil(rows_in / stride_r))
        cols_out = int(ceil(cols_in / stride_h))
        pad_rows = ((rows_out - 1) * stride_r + ksize_r_eff - rows_in) // 2
        pad_cols = ((cols_out - 1) * stride_h + ksize_c_eff - cols_in) // 2

    elif padding == b"VALID":
        rows_out = int(ceil((rows_in - ksize_r_eff + 1) / stride_r))
        cols_out = int(ceil((cols_in - ksize_c_eff + 1) / stride_h))
        pad_rows = (rows_out - 1) * stride_r + ksize_r_eff - rows_in
        pad_cols = (cols_out - 1) * stride_h + ksize_c_eff - cols_in

    pad_rows, pad_cols = max(0, pad_rows), max(0, pad_cols)

    grad_expanded = array_ops.transpose(
        array_ops.reshape(
            grad,
            (batch_size, rows_out, cols_out, ksize_r, ksize_c, channels)),
        (1, 2, 3, 4, 0, 5))
    grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels))

    row_steps = range(0, rows_out * stride_r, stride_r)
    col_steps = range(0, cols_out * stride_h, stride_h)

    idx = []
    for i in range(rows_out):
        for j in range(cols_out):
            r_low, c_low = row_steps[i] - pad_rows, col_steps[j] - pad_cols
            r_high, c_high = r_low + ksize_r_eff, c_low + ksize_c_eff

            idx.extend([
                (r * (cols_in) + c, i * (cols_out * ksize_r * ksize_c) + j *
                 (ksize_r * ksize_c) + ri * (ksize_c) + ci)
                for (ri, r) in enumerate(range(r_low, r_high, rate_r))
                for (ci, c) in enumerate(range(c_low, c_high, rate_c))
                if 0 <= r and r < rows_in and 0 <= c and c < cols_in
            ])

    sp_shape = (rows_in * cols_in, rows_out * cols_out * ksize_r * ksize_c)

    sp_mat = sparse_tensor.SparseTensor(
        array_ops.constant(idx, dtype=ops.dtypes.int64),
        array_ops.ones((len(idx), ), dtype=ops.dtypes.float32), sp_shape)

    jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat)

    grad_out = array_ops.reshape(jac, (rows_in, cols_in, batch_size, channels))
    grad_out = array_ops.transpose(grad_out, (2, 0, 1, 3))

    return [grad_out]
def _ReshapeToInput(op, grad):
    """Reshapes the gradient to the shape of the original input."""
    return array_ops.reshape(grad, array_ops.shape(op.inputs[0]))
def _ReshapeGrad(op, grad):
    return [array_ops.reshape(grad, array_ops.shape(op.inputs[0])), None]
def _GatherV2Grad(op, grad):
    """Gradient for GatherV2 op."""
    # params can be large, so colocate the shape calculation with it.
    #
    # params can be very large for sparse model, array_ops.shape raises
    # exception on the Windows platform when any dimension is larger than
    # int32. params_shape is not used in optimizer apply_sparse gradients,
    # so it's fine to convert it back to int32 regardless of truncation.
    params = op.inputs[0]
    with ops.colocate_with(params):
        params_shape = array_ops.shape(params, out_type=ops.dtypes.int64)
        params_shape = math_ops.to_int32(params_shape)

    indices = op.inputs[1]
    indices_size = array_ops.expand_dims(array_ops.size(indices), 0)
    axis = op.inputs[2]
    axis_static = tensor_util.constant_value(axis)

    # For axis 0 gathers, build an appropriately shaped IndexedSlices.
    if axis_static == 0:
        if context.executing_eagerly():
            params_tail_shape = params_shape.cpu()[1:]
        else:
            params_tail_shape = params_shape[1:]
        values_shape = array_ops.concat([indices_size, params_tail_shape], 0)
        values = array_ops.reshape(grad, values_shape)
        indices = array_ops.reshape(indices, indices_size)
        return [ops.IndexedSlices(values, indices, params_shape), None, None]

    outer_shape = params_shape[:axis]
    outer_dims = array_ops.size(outer_shape)
    inner_shape = params_shape[axis:][1:]
    inner_dims = array_ops.size(inner_shape)

    outer_axes_indices = math_ops.range(outer_dims)
    inner_axes_indices = math_ops.range(outer_dims + 1,
                                        outer_dims + 1 + inner_dims)

    values_shape = array_ops.concat([outer_shape, indices_size, inner_shape],
                                    0)
    values = array_ops.reshape(grad, values_shape)
    indices = array_ops.reshape(indices, indices_size)

    # We need to sum up every slice `values[..., i, ....]` corresponding to
    # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not
    # support an axis parameter, we transpose the gather dimension to the front,
    # then use `unsorted_segment_sum` to build a
    # [gather_axis, outer_axes, inner_axes] tensor with all the gradients
    # affecting each index in `gather_axis` summed up.
    transpose_dims = array_ops.concat(
        [[outer_dims], outer_axes_indices, inner_axes_indices], 0)
    values_transpose = array_ops.transpose(values, transpose_dims)
    num_segments = params_shape[axis]

    params_grad = math_ops.unsorted_segment_sum(values_transpose, indices,
                                                num_segments)

    # Inverts the above transpose by moving dimension 0 back to its original
    # position.
    invert_transpose_dims = array_ops.concat(
        [outer_axes_indices + 1, [0], inner_axes_indices], 0)
    params_grad = array_ops.transpose(params_grad, invert_transpose_dims)
    return [params_grad, None, None]
Example #37
0
def triplet_semihard_loss(labels, embeddings, metric, margin=1.0):
    """Computes the triplet loss with semi-hard negative mining.
    The loss encourages the positive distances (between a pair of embeddings with
    the same labels) to be smaller than the minimum negative distance among
    which are at least greater than the positive distance plus the margin constant
    (called semi-hard negative) in the mini-batch. If no such negative exists,
    uses the largest negative distance instead.
    See: https://arxiv.org/abs/1503.03832.
    Args:
        labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
        multiclass integer labels.
        embeddings: 2-D float `Tensor` of embedding vectors.
        margin: Float, margin term in the loss definition.
    Returns:
        triplet_loss: tf.float32 scalar.
    """
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pdist_matrix = metric(embeddings)
    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    # Compute the mask.
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile,
            array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32),
                                1,
                                keepdims=True), 0.0), [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(mask_final, negatives_outside,
                                          negatives_inside)

    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)

    triplet_loss = math_ops.truediv(math_ops.reduce_sum(
        math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0)),
                                    num_positives,
                                    name='triplet_semihard_loss')

    return triplet_loss, 0
Example #38
0
 def loss_fn(x):
     y = array_ops.reshape(gen_math_ops.mat_mul(x, kernel),
                           []) - constant_op.constant(1.)
     return y * y
Example #39
0
  def _argsort(a, axis, stable):
    if axis is None:
      a = array_ops.reshape(a, [-1])
      axis = 0

    return sort_ops.argsort(a, axis, stable=stable)
Example #40
0
    def __new__(cls,
                mode,
                predictions=None,
                loss=None,
                train_op=None,
                eval_metric_ops=None,
                export_outputs=None,
                training_chief_hooks=None,
                training_hooks=None,
                scaffold=None):
        """Creates a validated `EstimatorSpec` instance.

    Depending on the value of `mode`, different arguments are required. Namely
    * For `mode == ModeKeys.TRAIN`: required fields are `loss` and `train_op`.
    * For `mode == ModeKeys.EVAL`: required field is`loss`.
    * For `mode == ModeKeys.PREDICT`: required fields are `predictions`.

    model_fn can populate all arguments independent of mode. In this case, some
    arguments will be ignored by `Estimator`. E.g. `train_op` will be ignored
    in eval and infer modes. Example:

    ```python
    def my_model_fn(mode, features, labels):
      predictions = ...
      loss = ...
      train_op = ...
      return tf.estimator.EstimatorSpec(
          mode=mode,
          predictions=predictions,
          loss=loss,
          train_op=train_op)
    ```

    Alternatively, model_fn can just populate the arguments appropriate to the
    given mode. Example:

    ```python
    def my_model_fn(mode, features, labels):
      if (mode == tf.estimator.ModeKeys.TRAIN or
          mode == tf.estimator.ModeKeys.EVAL):
        loss = ...
      else:
        loss = None
      if mode == tf.estimator.ModeKeys.TRAIN:
        train_op = ...
      else:
        train_op = None
      if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = ...
      else:
        predictions = None

      return tf.estimator.EstimatorSpec(
          mode=mode,
          predictions=predictions,
          loss=loss,
          train_op=train_op)
    ```

    Args:
      mode: A `ModeKeys`. Specifies if this is training, evaluation or
        prediction.
      predictions: Predictions `Tensor` or dict of `Tensor`.
      loss: Training loss `Tensor`. Must be either scalar, or with shape `[1]`.
      train_op: Op for the training step.
      eval_metric_ops: Dict of metric results keyed by name. The values of the
        dict are the results of calling a metric function, namely a
        `(metric_tensor, update_op)` tuple.
      export_outputs: Describes the output signatures to be exported to
        `SavedModel` and used during serving.
        A dict `{name: output}` where:
        * name: An arbitrary name for this output.
        * output: an `ExportOutput` object such as `ClassificationOutput`,
            `RegressionOutput`, or `PredictOutput`.
        Single-headed models only need to specify one entry in this dictionary.
        Multi-headed models should specify one entry for each head, one of
        which must be named using
        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.
      training_chief_hooks: A list of `tf.train.SessionRunHook` objects to
        run on the chief worker during training.
      training_hooks: A list of `tf.train.SessionRunHook` objects that to run on
        all workers during training.
      scaffold: A `tf.train.Scaffold` object that can be used to set
        initialization, saver, and more to be used in training.

    Returns:
      A validated `EstimatorSpec` object.

    Raises:
      ValueError: If validation fails.
      TypeError: If any of the arguments is not the expected type.
    """
        # Validate train_op.
        if train_op is None:
            if mode == ModeKeys.TRAIN:
                raise ValueError('Missing train_op.')
        else:
            _check_is_tensor_or_operation(train_op, 'train_op')

        # Validate loss.
        if loss is None:
            if mode in (ModeKeys.TRAIN, ModeKeys.EVAL):
                raise ValueError('Missing loss.')
        else:
            loss = _check_is_tensor(loss, 'loss')
            loss_shape = loss.get_shape()
            if loss_shape.num_elements() not in (None, 1):
                raise ValueError('Loss must be scalar, given: {}'.format(loss))
            if not loss_shape.is_compatible_with(tensor_shape.scalar()):
                loss = array_ops.reshape(loss, [])

        # Validate predictions.
        if predictions is None:
            if mode == ModeKeys.PREDICT:
                raise ValueError('Missing predictions.')
            predictions = {}
        else:
            if isinstance(predictions, dict):
                predictions = {
                    k: _check_is_tensor(v, 'predictions[{}]'.format(k))
                    for k, v in six.iteritems(predictions)
                }
            else:
                predictions = _check_is_tensor(predictions, 'predictions')

        # Validate eval_metric_ops.
        if eval_metric_ops is None:
            eval_metric_ops = {}
        else:
            if not isinstance(eval_metric_ops, dict):
                raise TypeError(
                    'eval_metric_ops must be a dict, given: {}'.format(
                        eval_metric_ops))
            for key, metric_value in six.iteritems(eval_metric_ops):
                if (not isinstance(metric_value, tuple)
                        or len(metric_value) != 2):
                    raise TypeError(
                        'Values of eval_metric_ops must be (metric_tensor, update_op) '
                        'tuples, given: {} for key: {}'.format(
                            metric_value, key))
                _check_is_tensor_or_operation(
                    metric_value[0], 'eval_metric_ops[{}]'.format(key))
                _check_is_tensor_or_operation(
                    metric_value[1], 'eval_metric_ops[{}]'.format(key))

        # Validate export_outputs.
        if export_outputs is not None:
            if not isinstance(export_outputs, dict):
                raise TypeError(
                    'export_outputs must be dict, given: {}'.format(
                        export_outputs))
            for v in six.itervalues(export_outputs):
                if not isinstance(v, ExportOutput):
                    raise TypeError(
                        'Values in export_outputs must be ExportOutput objects. '
                        'Given: {}'.format(export_outputs))
            # Note export_outputs is allowed to be empty.
            if len(export_outputs) == 1:
                (key, value), = export_outputs.items()
                if key != signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                    export_outputs[signature_constants.
                                   DEFAULT_SERVING_SIGNATURE_DEF_KEY] = value
            if len(export_outputs) > 1:
                if (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
                        not in export_outputs):
                    raise ValueError(
                        'Multiple export_outputs were provided, but none of them is '
                        'specified as the default.  Do this by naming one of them with '
                        'signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.'
                    )

        # Validate that all tensors and ops are from the default graph.
        default_graph = ops.get_default_graph()
        for value in _prediction_values(predictions):
            if value.graph is not default_graph:
                raise ValueError(
                    'prediction values must be from the default graph.')
        if loss is not None and loss.graph is not default_graph:
            raise ValueError('loss must be from the default graph.')
        if train_op is not None and train_op.graph is not default_graph:
            raise ValueError('train_op must be from the default graph.')
        for value in _eval_metric_ops_values(eval_metric_ops):
            if value.graph is not default_graph:
                raise ValueError(
                    'eval_metric_ops values must be from the default graph.')

        # Validate hooks.
        if training_chief_hooks is None:
            training_chief_hooks = []
        if training_hooks is None:
            training_hooks = []
        for hook in training_hooks + training_chief_hooks:
            if not isinstance(hook, session_run_hook.SessionRunHook):
                raise TypeError(
                    'All hooks must be SessionRunHook instances, given: {}'.
                    format(hook))

        scaffold = scaffold or monitored_session.Scaffold()
        # Validate scaffold.
        if not isinstance(scaffold, monitored_session.Scaffold):
            raise TypeError(
                'scaffold must be tf.train.Scaffold. Given: {}'.format(
                    scaffold))

        return super(EstimatorSpec,
                     cls).__new__(cls,
                                  predictions=predictions,
                                  loss=loss,
                                  train_op=train_op,
                                  eval_metric_ops=eval_metric_ops,
                                  export_outputs=export_outputs,
                                  training_chief_hooks=training_chief_hooks,
                                  training_hooks=training_hooks,
                                  scaffold=scaffold)
 def f(x):
   return array_ops.reshape(
       x, [math_ops.cast(a, dtypes.int32),
           math_ops.cast(b, dtypes.int32)])
Example #42
0
 def f(a, b):
   return array_ops.reshape(a, [-1, 1]) * array_ops.reshape(b, [-1])
Example #43
0
def update_confusion_matrix_variables(variables_to_update,
                                      y_true,
                                      y_pred,
                                      thresholds,
                                      sample_weight=None):
    """Returns op to update the given confusion matrix variables.

  For every pair of values in y_true and y_pred:

  true_positive: y_true == True and y_pred > thresholds
  false_negatives: y_true == True and y_pred <= thresholds
  true_negatives: y_true == False and y_pred <= thresholds
  false_positive: y_true == False and y_pred > thresholds

  The results will be weighted and added together. When multiple thresholds are
  provided, we will repeat the same for every threshold.

  For estimation of these metrics over a stream of data, the function creates an
  `update_op` operation that updates the given variables.

  If `sample_weight` is `None`, weights default to 1.
  Use weights of 0 to mask values.

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A float value or a python list or tuple of float thresholds in
      `[0, 1]`.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `y_true` dimension).

  Returns:
    Update op.

  Raises:
    ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
      `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
      `variables_to_update` contains invalid keys.
  """
    if variables_to_update is None:
        return
    y_true = ops.convert_to_tensor(y_true)
    y_pred = ops.convert_to_tensor(y_pred)
    y_pred.shape.assert_is_compatible_with(y_true.shape)

    if not any(key
               for key in variables_to_update if key in list(ConfusionMatrix)):
        raise ValueError(
            'Please provide at least one valid confusion matrix '
            'variable to update. Valid variable key options are: "{}". '
            'Received: "{}"'.format(list(ConfusionMatrix),
                                    variables_to_update.keys()))

    invalid_keys = [
        key for key in variables_to_update if key not in list(ConfusionMatrix)
    ]
    if invalid_keys:
        raise ValueError(
            'Invalid keys: {}. Valid variable key options are: "{}"'.format(
                invalid_keys, list(ConfusionMatrix)))

    with ops.control_dependencies([
            check_ops.assert_greater_equal(y_pred,
                                           math_ops.cast(0.0,
                                                         dtype=y_pred.dtype),
                                           message='predictions must be >= 0'),
            check_ops.assert_less_equal(y_pred,
                                        math_ops.cast(1.0, dtype=y_pred.dtype),
                                        message='predictions must be <= 1')
    ]):
        y_pred, y_true, sample_weight = squeeze_or_expand_dimensions(
            math_ops.cast(y_pred, dtype=dtypes.float32),
            math_ops.cast(y_true, dtype=dtypes.bool), sample_weight)

    thresholds = to_list(thresholds)
    num_thresholds = len(thresholds)
    num_predictions = array_ops.size(y_pred)

    # Reshape predictions and labels.
    predictions_2d = array_ops.reshape(y_pred, [1, -1])
    labels_2d = array_ops.reshape(math_ops.cast(y_true, dtype=dtypes.bool),
                                  [1, -1])

    # Tile the thresholds for every prediction.
    thresh_tiled = array_ops.tile(
        array_ops.expand_dims(array_ops.constant(thresholds), 1),
        array_ops.stack([1, num_predictions]))

    # Tile the predictions for every threshold.
    preds_tiled = array_ops.tile(predictions_2d, [num_thresholds, 1])

    # Compare predictions and threshold.
    pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled)

    # Tile labels by number of thresholds
    label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1])

    if sample_weight is not None:
        weights = weights_broadcast_ops.broadcast_weights(
            math_ops.cast(sample_weight, dtype=dtypes.float32), y_pred)
        weights_tiled = array_ops.tile(array_ops.reshape(weights, [1, -1]),
                                       [num_thresholds, 1])
    else:
        weights_tiled = None

    update_ops = []

    def weighted_assign_add(label, pred, weights, var):
        label_and_pred = math_ops.cast(math_ops.logical_and(label, pred),
                                       dtype=dtypes.float32)
        if weights is not None:
            label_and_pred *= weights
        return state_ops.assign_add(var,
                                    math_ops.reduce_sum(label_and_pred, 1))

    loop_vars = {
        ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
    }
    update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
    update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
    update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

    if update_fn or update_tn:
        pred_is_neg = math_ops.logical_not(pred_is_pos)
        loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos,
                                                      pred_is_neg)

    if update_fp or update_tn:
        label_is_neg = math_ops.logical_not(label_is_pos)
        loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg,
                                                      pred_is_pos)
        if update_tn:
            loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg,
                                                         pred_is_neg)

    for matrix_cond, (label, pred) in loop_vars.items():
        if matrix_cond in variables_to_update:
            update_ops.append(
                weighted_assign_add(label, pred, weights_tiled,
                                    variables_to_update[matrix_cond]))
    return control_flow_ops.group(update_ops)
 def f(x, val1, val2):
   a.assign(math_ops.cast(val1, dtypes.float32))
   b.assign(math_ops.cast(val2, dtypes.float32))
   return array_ops.reshape(
       x, [math_ops.cast(a, dtypes.int32),
           math_ops.cast(b, dtypes.int32)])
Example #45
0
def _reshape_for_efficiency(a,
                            b,
                            transpose_a=False,
                            transpose_b=False,
                            adjoint_a=False,
                            adjoint_b=False):
    """Maybe reshape a, b, and return an inverse map.  For matmul/solve."""
    def identity(x):
        return x

    # At this point, we have not taken transpose/adjoint of a/b.
    still_need_to_transpose = True

    if a.shape.ndims is None or b.shape.ndims is None:
        return a, b, identity, still_need_to_transpose

    # This could be handled in the future, but seems less common.
    if a.shape.ndims >= b.shape.ndims:
        return a, b, identity, still_need_to_transpose

    # From now on, we might modify b, but will not modify a.

    # Suppose:
    #   a.shape =     C + [m, n], b.shape =
    #   b.shape = S + C + [n, r]
    b_extra_ndims = b.shape.ndims - a.shape.ndims

    # b_extra_sh = S, b_main_sh = C + [n, r]
    b_extra_sh = array_ops.shape(b)[:b_extra_ndims]
    b_main_sh = array_ops.shape(b)[b_extra_ndims:]

    # No reason to flip unless the extra dims of b are big enough.  Why?
    # Assume adjoint/transpose = False.  Then...
    # By not flipping, we have to replicate a to shape
    #   b_extra_sh + a.shape,
    # which could use extra memory.  But in all cases, the final output has shape
    #   b_extra_sh + a.shape[:-1] + [b.shape[-1]]
    # So we only end up creating a larger object if the end dim of b is smaller
    # than the end dim of a.  This often happens, e.g. if b was a vector that was
    # expanded to a matrix (by appending a singleton).

    # Since adjoint/transpose may not be False, we must make adjustments here.
    # The dim of b that holds the multiple equations.
    a_domain_sz_ = a.shape[-2 if adjoint_a or transpose_a else -1]
    b_eq_sz_ = b.shape[-2 if adjoint_b or transpose_b else -1]
    b_extra_sz_ = (np.prod(b.shape[:b_extra_ndims].as_list())
                   if b.shape[:b_extra_ndims].is_fully_defined() else None)
    if (a_domain_sz_ is not None and b_eq_sz_ is not None
            and b_extra_sz_ is not None):
        if b_extra_sz_ < 2 or a_domain_sz_ <= b_eq_sz_:
            return a, b, identity, still_need_to_transpose

    # At this point, we're flipping for sure!
    # Any transposes/adjoints will happen here explicitly, rather than in calling
    # code.  Why?  To avoid having to write separate complex code for each case.
    if adjoint_a:
        a = linalg.adjoint(a)
    elif transpose_a:
        a = linalg.transpose(a)
    if adjoint_b:
        b = linalg.adjoint(b)
    elif transpose_b:
        b = linalg.transpose(b)
    still_need_to_transpose = False

    # Recompute shapes, since the transpose/adjoint may have changed them.
    b_extra_sh = array_ops.shape(b)[:b_extra_ndims]
    b_main_sh = array_ops.shape(b)[b_extra_ndims:]

    # Permutation to put the extra dims at the end.
    perm = (np.concatenate(
        (np.arange(b_extra_ndims, b.shape.ndims), np.arange(0, b_extra_ndims)),
        0))
    b_extra_on_end = array_ops.transpose(b, perm=perm)

    # Now squash this end into one long dim.
    b_squashed_end = array_ops.reshape(
        b_extra_on_end, array_ops.concat((b_main_sh[:-1], [-1]), 0))

    def reshape_inv(y):
        # Expand the extra dims hanging off the end, "b_extra_sh".
        # Note we use y_sh[:-1] + [b_main_sh[-1]] rather than b_main_sh, because y
        # Could have different batch dims than a and b, because of broadcasting.
        y_extra_shape = array_ops.concat(
            (array_ops.shape(y)[:-1], [b_main_sh[-1]], b_extra_sh), 0)
        y_extra_on_end = array_ops.reshape(y, y_extra_shape)
        inverse_perm = np.argsort(perm)
        return array_ops.transpose(y_extra_on_end, perm=inverse_perm)

    return a, b_squashed_end, reshape_inv, still_need_to_transpose
 def f(l):
   return array_ops.reshape(l, s)
Example #47
0
def iris_input_fn():
  iris = base.load_iris()
  features = array_ops.reshape(
      constant_op.constant(iris.data), [-1, _IRIS_INPUT_DIM])
  labels = array_ops.reshape(constant_op.constant(iris.target), [-1])
  return features, labels
Example #48
0
def attention_single_output_decoder(initial_state,
                                    attention_states,
                                    output_size=None,
                                    num_heads=1,
                                    dtype=dtypes.float32,
                                    scope=None,
                                    sequence_length=array_ops.ones([16]),
                                    initial_state_attention=True,
                                    use_attention=False):

  if num_heads < 1:
    raise ValueError("With less than 1 heads, use a non-attention decoder.")
  if not attention_states.get_shape()[1:2].is_fully_defined():
    raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
                     % attention_states.get_shape())

  with variable_scope.variable_scope(scope or "decoder_single_output"):
#    print (initial_state.eval().shape)
    batch_size = array_ops.shape(initial_state)[0]  # Needed for reshaping.
#    print (attention_states.get_shape())
    attn_length = attention_states.get_shape()[1].value
    attn_size = attention_states.get_shape()[2].value

    # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
    hidden = array_ops.reshape(
        attention_states, [-1, attn_length, 1, attn_size])
    hidden_features = []
    v = []
    attention_vec_size = attn_size  # Size of query vectors for attention.
    for a in xrange(num_heads):
      k = variable_scope.get_variable("AttnW_%d" % a,
                                      [1, 1, attn_size, attention_vec_size])
      hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
      v.append(variable_scope.get_variable("AttnV_%d" % a,
                                           [attention_vec_size]))

#     state = initial_state

    def attention(query, use_attention=False):
      """Put attention masks on hidden using hidden_features and query."""
      attn_weights = []
      ds = []  # Results of attention reads will be stored here.
      for i in xrange(num_heads):
        with variable_scope.variable_scope("Attention_%d" % i):
          y = rnn_cell._linear(query, attention_vec_size, True)
          y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
          # Attention mask is a softmax of v^T * tanh(...).
          s = math_ops.reduce_sum(
              v[i] * math_ops.tanh(hidden_features[i] + y), [2, 3])
          if use_attention is False: # apply mean pooling
              weights = tf.tile(sequence_length, tf.stack([attn_length]))
              weights = array_ops.reshape(weights, tf.shape(s))
              a = array_ops.ones(tf.shape(s), dtype=dtype) / math_ops.to_float(weights)
              # a = array_ops.ones(tf.shape(s), dtype=dtype) / math_ops.to_float(tf.shape(s)[1])
          else:
            a = nn_ops.softmax(s)
          attn_weights.append(a)
          # Now calculate the attention-weighted vector d.
          d = math_ops.reduce_sum(
              array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
              [1, 2])
          ds.append(array_ops.reshape(d, [-1, attn_size]))
      return attn_weights, ds

    batch_attn_size = array_ops.stack([batch_size, attn_size])
    attns = [array_ops.zeros(batch_attn_size, dtype=dtype)
             for _ in xrange(num_heads)]
    for a in attns:  # Ensure the second shape of attention vectors is set.
      a.set_shape([None, attn_size])
    if initial_state_attention:
      attn_weights, attns = attention(initial_state, use_attention=use_attention)

    #with variable_scope.variable_scope(scope or "Linear"):
    matrix = variable_scope.get_variable("Out_Matrix", [attn_size, output_size])
    res = math_ops.matmul(attns[0], matrix) # NOTE: here we temporarily assume num_head = 1
    bias_start = 0.0
    bias_term = variable_scope.get_variable("Out_Bias", [output_size],
                                              initializer=init_ops.constant_initializer(bias_start))
    output = res + bias_term
  return attention_states, attn_weights[0], attns[0], [output] # NOTE: here we temporarily assume num_head = 1
Example #49
0
 def even_s(off, size):
     off = array_ops.reshape(off, [-1, size//2, 2])
     off = array_ops.reshape(array_ops.reverse(off, [2]), [-1, size])
     return off
Example #50
0
 def loop_fn(i):
   x1 = array_ops.gather(x, i)
   return array_ops.reshape(x1, [-1]), array_ops.reshape(x1, [1, 3, 1, -1])
  def __call__(self, inputs, state, scope=None):
    # global h,e_ti,z_i,alpha_ti
    """Long short-term memory cell (LSTM)."""
    with vs.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
      # Parameters of gates are concatenated into one multiply for efficiency.
      if self._state_is_tuple:
        c, h = state
      else:
        c, h = array_ops.split(1, 2, state)
      ## seperate inputs into word imbedding and image subfeatures

      shape = inputs.get_shape().as_list()
      # print("shape ")
      # print(shape)
      batch_size = shape[0]
      print("inputs.get_shape[0]")
      print(batch_size)
      hsize=h.get_shape()
      print("hidden state length")
      print(hsize[1].value)
      #padded_length = shape[1].value
      single_input_length = shape[1]
      # print("single_input_length ")
      # print(single_input_length)
      word_imbedding_length = 512
      #subfeature_length = 192#(single_input_length-word_imbedding_length)/subfeature_num;
      subfeature_length = 768
      # subfeature_num = int((single_input_length-word_imbedding_length)/subfeature_length)
      # subfeature_num = 35*35
      subfeature_num = 17*17
      #batch_size_ops
      # batch_size=32
      tensorShape=tf.shape(inputs)
      #z_i = array_ops.zeros([batch_size,subfeature_length])

      z_i=array_ops.zeros(tf.pack([tensorShape[0],subfeature_length]))
      print('inputs:')
      print(inputs)
      # print("Initial z_i:")
      # print(z_i)
      state_length = self._num_units
      # with vs.variable_scope(scope or type(self).__name__,initializer=self._initializer):
      #f_att_matrix = vs.get_variable(name="f_att_matrix",shape = (subfeature_length,state_length), initializer=tf.contrib.layers.xavier_initializer(),dtype=tf.float32)
      mid_layer_size = 300
      W1 = vs.get_variable(name="w1",shape=(hsize[1].value+subfeature_length,mid_layer_size),initializer=tf.contrib.layers.xavier_initializer(),dtype=tf.float32)
      W2 = vs.get_variable(name="w2",shape=(mid_layer_size,1),initializer=tf.contrib.layers.xavier_initializer(),dtype=tf.float32)
      b1 = vs.get_variable(name="b1",shape=(1,mid_layer_size),initializer=tf.zeros_initializer,dtype=tf.float32)
      b2 = vs.get_variable(name="b2",shape=(1,1),initializer=tf.zeros_initializer,dtype=tf.float32)  
      word_imbeddings=inputs[:,0:word_imbedding_length]
      alpha_ti = []
      if single_input_length != word_imbedding_length:
        image_subfeatures=inputs[:,word_imbedding_length:single_input_length]
        #tf.summary.histogram("tensors/" + "subfeatures", image_subfeatures)
        #net2 = tf.reshape(net2, [shape2[0].value, -1, shape2[3].value])

        #image_subfeatures=array_ops.reshape(image_subfeatures,[batch_size,subfeature_num,subfeature_length])
        image_subfeatures=array_ops.reshape(image_subfeatures,tf.pack([tensorShape[0],subfeature_num,subfeature_length]))

        # f_att_matrix_exp=tf.expand_dims(f_att_matrix,0)
        # f_att_matrix_tile=tf.tile(f_att_matrix_exp,tf.pack([batch_size,1,1]))
        # print("fatt,fatt_exp,fatt_tile")
        # print(f_att_matrix)
        # print(f_att_matrix_exp)
        # print(f_att_matrix_tile)
        # tf.Print(f_att_matrix,[f_att_matrix])
        # h=tf.expand_dims(h,2)
        # e_ti = math_ops.matmul(math_ops.matmul(tf.sigmoid(image_subfeatures),f_att_matrix_tile),h)
        # e_ti =array_ops.zeros([batch_size,subfeature_num])
 
        W1_matrix=tf.expand_dims(W1,0) #[1,state_length+subfeature_length,mid_layer_size]
        W1_matrix=tf.tile(W1_matrix,tf.pack([tensorShape[0],1,1])) #[batchsize,state_length+subfeature_length,mid_layer_size]
        W2_matrix=tf.expand_dims(W2,0)
        W2_matrix=tf.tile(W2_matrix,tf.pack([tensorShape[0],1,1]))
        b1_matrix=tf.expand_dims(b1,0) #[1,1,mid_layer_size]     
        b1_matrix=tf.tile(b1_matrix,tf.pack([tensorShape[0],1,1]))
        b2_matrix=tf.expand_dims(b2,0) #[1,1,mid_layer_size]     
        b2_matrix=tf.tile(b2_matrix,tf.pack([tensorShape[0],1,1]))

        h_matrix=tf.expand_dims(h,1) # [batchsize,1,state_length]
        h_matrix=tf.tile(h_matrix,[1,subfeature_num,1]) #[batchsize,subfeature_num,state_length]
        x1 = tf.concat(2,[h_matrix,image_subfeatures]) #[batchsize,subfeature_num,state_length+subfeature_length]
        #x2 = tf.tanh(math_ops.matmul(x1,W1_matrix)+b1_matrix) #[batchsize,subfeature_num,mid_layer_size]
        x2 = tf.nn.relu(math_ops.matmul(x1,W1_matrix)+b1_matrix) #[batchsize,subfeature_num,mid_layer_size]
        #e_ti = tf.tanh(math_ops.matmul(x2,W2_matrix)+b2_matrix) #[batchsize,subfeature_num,1]
        e_ti = tf.nn.relu(math_ops.matmul(x2,W2_matrix)+b2_matrix) #[batchsize,subfeature_num,1]
        #e_ti = tf.squeeze(e_ti,[2]) #[batchsize,subfeature_num]
        alpha_ti = nn_ops.softmax(e_ti,dim=1) #[batchsize,subfeature_num,1]

        # e_ti=[]
        # for i in range(subfeature_num):
        #   x1 = tf.concat(1,[h,image_subfeatures[:,i,:]])
        #   x2 = tf.tanh(math_ops.matmul(x1,W1)+b1)
        #   x3 = tf.tanh(math_ops.matmul(x2,W2)+b2)
        #   e_ti.append(x3)
        # # e_ti = self.f_att(image_subfeatures,subfeature_length,h,scope)
        # print("x1")
        # print(x1)
        # print("x2")
        # print(x2)
        # print("x3")
        # print(x3)
        # e_ti=tf.transpose(tf.pack(e_ti),[1,0,2])
        # print("e_ti")
        # print(e_ti)
        # alpha_ti = nn_ops.softmax(e_ti)


        #tf.summary.histogram("tensors/" + "alpha_ti", alpha_ti)
        # z_i = math_ops.reduce_sum(math_ops.matmul(tf.transpose(image_subfeatures,[0,2,1]),alpha_ti),axis=1)
        z_i = math_ops.matmul(tf.transpose(image_subfeatures,[0,2,1]),alpha_ti)
        # h=tf.squeeze(h,[2])
        z_i=tf.squeeze(z_i,squeeze_dims=[2])
        print("squeezed z_i")
        print(z_i)
        #tf.summary.histogram("tensors/" + "z_i", z_i)
      #tf.summary.histogram("tensors/" + "h", h)
      concat = _linear([word_imbeddings, h, z_i], 4 * self._num_units, True) ###
      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      i, j, f, o = array_ops.split(1, 4, concat)

      new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) *
               self._activation(j))
      new_h = self._activation(new_c) * sigmoid(o)

      if self._state_is_tuple:
        new_state = LSTMStateTuple(new_c, new_h)
      else:
        new_state = array_ops.concat(1, [new_c, new_h])
      return new_h, new_state, alpha_ti, z_i, word_imbeddings
Example #52
0
 def modify(off_normal, dist, normal_size):
     off_normal = array_ops.reshape(array_ops.reverse(array_ops.reshape(off_normal, [-1, normal_size//(2**dist), 2, (2**(dist-1))]), [2]), [-1, normal_size])
     return off_normal
Example #53
0
def _ReshapeGrad(op, grad):
    return [
        array_ops.reshape(_IndexedSlicesToTensorNoWarning(grad),
                          array_ops.shape(op.inputs[0])), None
    ]
Example #54
0
def _eunn_param(hidden_size, capacity=2, fft=False, comp=True, name=None):
    """
    Create parameters and do the initial preparations
    """
    theta_phi_initializer = init_ops.random_uniform_initializer(-np.pi, np.pi)
    if fft:
        capacity = int(np.ceil(np.log2(hidden_size)))

        diag_list_0 = []
        off_list_0 = []
        varsize = 0
        for i in range(capacity):
            size = capacity - i
            normal_size = (hidden_size // (2 ** size)) * (2 ** (size - 1))
            extra_size = max(0, (hidden_size % (2 ** size)) - (2 ** (size - 1)))
            varsize += normal_size + extra_size

        params_theta = vs.get_variable(name+"theta_0", [varsize], initializer=theta_phi_initializer)
        cos_theta = math_ops.cos(params_theta)
        sin_theta = math_ops.sin(params_theta)

        if comp:
            params_phi = vs.get_variable(name+"phi_0", [varsize], initializer=theta_phi_initializer)
            cos_phi = math_ops.cos(params_phi)
            sin_phi = math_ops.sin(params_phi)

            cos_list_0 = math_ops.complex(cos_theta, array_ops.zeros_like(cos_theta))
            cos_list_1 = math_ops.complex(math_ops.multiply(cos_theta, cos_phi), math_ops.multiply(cos_theta, sin_phi))
            sin_list_0 = math_ops.complex(sin_theta, array_ops.zeros_like(sin_theta))
            sin_list_1 = math_ops.complex(-math_ops.multiply(sin_theta, cos_phi), -math_ops.multiply(sin_theta, sin_phi))

        last = 0
        for i in range(capacity):
            size = capacity - i
            normal_size = (hidden_size // (2 ** size)) * (2 ** (size - 1))
            extra_size = max(0, (hidden_size % (2 ** size)) - (2 ** (size - 1)))

            if comp:
                cos_list_normal = array_ops.concat([array_ops.slice(cos_list_0, [last], [normal_size]), array_ops.slice(cos_list_1, [last], [normal_size])], 0)
                sin_list_normal = array_ops.concat([array_ops.slice(sin_list_0, [last], [normal_size]), -array_ops.slice(sin_list_1, [last], [normal_size])], 0)
                last += normal_size

                cos_list_extra = array_ops.concat([array_ops.slice(cos_list_0, [last], [extra_size]), math_ops.complex(tf.ones([hidden_size - 2*normal_size - 2*extra_size]), tf.zeros([hidden_size - 2*normal_size - 2*extra_size])), array_ops.slice(cos_list_1, [last], [extra_size])], 0)
                sin_list_extra = array_ops.concat([array_ops.slice(sin_list_0, [last], [extra_size]), math_ops.complex(tf.zeros([hidden_size - 2*normal_size - 2*extra_size]), tf.zeros([hidden_size - 2*normal_size - 2*extra_size])), -array_ops.slice(sin_list_1, [last], [extra_size])], 0)
                last += extra_size

            else:
                cos_list_normal = array_ops.slice(cos_theta, [last], [normal_size])
                cos_list_normal = array_ops.concat([cos_list_normal, cos_list_normal], 0)
                cos_list_extra = array_ops.slice(cos_theta, [last+normal_size], [extra_size])
                cos_list_extra = array_ops.concat([cos_list_extra, tf.ones([hidden_size - 2*normal_size - 2*extra_size]), cos_list_extra], 0)

                sin_list_normal = array_ops.slice(sin_theta, [last], [normal_size])
                sin_list_normal = array_ops.concat([sin_list_normal, -sin_list_normal], 0)
                sin_list_extra = array_ops.slice(sin_theta, [last+normal_size], [extra_size])
                sin_list_extra = array_ops.concat([sin_list_extra, tf.zeros([hidden_size - 2*normal_size - 2*extra_size]), -sin_list_extra], 0)

                last += normal_size + extra_size

            if normal_size != 0:
                cos_list_normal = array_ops.reshape(array_ops.transpose(array_ops.reshape(cos_list_normal, [-1, 2*normal_size//(2**size)])), [-1])
                sin_list_normal = array_ops.reshape(array_ops.transpose(array_ops.reshape(sin_list_normal, [-1, 2*normal_size//(2**size)])), [-1])

            cos_list = array_ops.concat([cos_list_normal, cos_list_extra], 0)
            sin_list = array_ops.concat([sin_list_normal, sin_list_extra], 0)
            diag_list_0.append(cos_list)
            off_list_0.append(sin_list)

        diag_vec = array_ops.stack(diag_list_0, 0)
        off_vec = array_ops.stack(off_list_0, 0)

    else:
        capacity_b = capacity//2
        capacity_a = capacity - capacity_b

        hidden_size_a = hidden_size//2
        hidden_size_b = (hidden_size-1)//2

        params_theta_0 = vs.get_variable(name+"theta_0", [capacity_a, hidden_size_a], initializer=theta_phi_initializer)
        cos_theta_0 = array_ops.reshape(math_ops.cos(params_theta_0), [capacity_a, -1, 1])
        sin_theta_0 = array_ops.reshape(math_ops.sin(params_theta_0), [capacity_a, -1, 1])

        params_theta_1 = vs.get_variable(name+"theta_1", [capacity_b, hidden_size_b], initializer=theta_phi_initializer)
        cos_theta_1 = array_ops.reshape(math_ops.cos(params_theta_1), [capacity_b, -1, 1])
        sin_theta_1 = array_ops.reshape(math_ops.sin(params_theta_1), [capacity_b, -1, 1])

        if comp:
            params_phi_0 = vs.get_variable(name+"phi_0", [capacity_a, hidden_size_a], initializer=theta_phi_initializer)
            cos_phi_0 = array_ops.reshape(math_ops.cos(params_phi_0), [capacity_a, -1, 1])
            sin_phi_0 = array_ops.reshape(math_ops.sin(params_phi_0), [capacity_a, -1, 1])

            cos_list_0_re = array_ops.reshape(array_ops.concat([cos_theta_0, math_ops.multiply(cos_theta_0, cos_phi_0)], 2), [capacity_a, -1])
            cos_list_0_im = array_ops.reshape(array_ops.concat([array_ops.zeros_like(cos_theta_0), math_ops.multiply(cos_theta_0, sin_phi_0)], 2), [capacity_a, -1])
            if hidden_size_a*2 != hidden_size:
                cos_list_0_re = array_ops.concat([cos_list_0_re, tf.ones([capacity_a, 1])], 1)
                cos_list_0_im = array_ops.concat([cos_list_0_im, tf.zeros([capacity_a, 1])], 1)
            cos_list_0 = math_ops.complex(cos_list_0_re, cos_list_0_im)

            sin_list_0_re = array_ops.reshape(array_ops.concat([sin_theta_0, - math_ops.multiply(sin_theta_0, cos_phi_0)], 2), [capacity_a, -1])
            sin_list_0_im = array_ops.reshape(array_ops.concat([array_ops.zeros_like(sin_theta_0), - math_ops.multiply(sin_theta_0, sin_phi_0)], 2), [capacity_a, -1])
            if hidden_size_a*2 != hidden_size:
                sin_list_0_re = array_ops.concat([sin_list_0_re, tf.zeros([capacity_a, 1])], 1)
                sin_list_0_im = array_ops.concat([sin_list_0_im, tf.zeros([capacity_a, 1])], 1)
            sin_list_0 = math_ops.complex(sin_list_0_re, sin_list_0_im)

            params_phi_1 = vs.get_variable(name+"phi_1", [capacity_b, hidden_size_b], initializer=theta_phi_initializer)
            cos_phi_1 = array_ops.reshape(math_ops.cos(params_phi_1), [capacity_b, -1, 1])
            sin_phi_1 = array_ops.reshape(math_ops.sin(params_phi_1), [capacity_b, -1, 1])

            cos_list_1_re = array_ops.reshape(array_ops.concat([cos_theta_1, math_ops.multiply(cos_theta_1, cos_phi_1)], 2), [capacity_b, -1])
            cos_list_1_re = array_ops.concat([tf.ones((capacity_b, 1)), cos_list_1_re], 1)
            cos_list_1_im = array_ops.reshape(array_ops.concat([array_ops.zeros_like(cos_theta_1), math_ops.multiply(cos_theta_1, sin_phi_1)], 2), [capacity_b, -1])
            cos_list_1_im = array_ops.concat([tf.zeros((capacity_b, 1)), cos_list_1_im], 1)
            if hidden_size_b*2 != hidden_size-1:
                cos_list_1_re = array_ops.concat([cos_list_1_re, tf.ones([capacity_b, 1])], 1)
                cos_list_1_im = array_ops.concat([cos_list_1_im, tf.zeros([capacity_b, 1])], 1)
            cos_list_1 = math_ops.complex(cos_list_1_re, cos_list_1_im)

            sin_list_1_re = array_ops.reshape(array_ops.concat([sin_theta_1, -math_ops.multiply(sin_theta_1, cos_phi_1)], 2), [capacity_b, -1])
            sin_list_1_re = array_ops.concat([tf.zeros((capacity_b, 1)), sin_list_1_re], 1)
            sin_list_1_im = array_ops.reshape(array_ops.concat([array_ops.zeros_like(sin_theta_1), -math_ops.multiply(sin_theta_1, sin_phi_1)], 2), [capacity_b, -1])
            sin_list_1_im = array_ops.concat([tf.zeros((capacity_b, 1)), sin_list_1_im], 1)
            if hidden_size_b*2 != hidden_size-1:
                sin_list_1_re = array_ops.concat([sin_list_1_re, tf.zeros([capacity_b, 1])], 1)
                sin_list_1_im = array_ops.concat([sin_list_1_im, tf.zeros([capacity_b, 1])], 1)
            sin_list_1 = math_ops.complex(sin_list_1_re, sin_list_1_im)
        else:
            cos_list_0 = array_ops.reshape(array_ops.concat([cos_theta_0, cos_theta_0], 2), [capacity_a, -1])
            sin_list_0 = array_ops.reshape(array_ops.concat([sin_theta_0, -sin_theta_0], 2), [capacity_a, -1])
            if hidden_size_a*2 != hidden_size:
                cos_list_0 = array_ops.concat([cos_list_0, tf.ones([capacity_a, 1])], 1)
                sin_list_0 = array_ops.concat([sin_list_0, tf.zeros([capacity_a, 1])], 1)

            cos_list_1 = array_ops.reshape(array_ops.concat([cos_theta_1, cos_theta_1], 2), [capacity_b, -1])
            cos_list_1 = array_ops.concat([tf.ones((capacity_b, 1)), cos_list_1], 1)
            sin_list_1 = array_ops.reshape(array_ops.concat([sin_theta_1, -sin_theta_1], 2), [capacity_b, -1])
            sin_list_1 = array_ops.concat([tf.zeros((capacity_b, 1)), sin_list_1], 1)
            if hidden_size_b*2 != hidden_size-1:
                cos_list_1 = array_ops.concat([cos_list_1, tf.zeros([capacity_b, 1])], 1)
                sin_list_1 = array_ops.concat([sin_list_1, tf.zeros([capacity_b, 1])], 1)

        if capacity_b != capacity_a:
            if comp:
                cos_list_1 = array_ops.concat([cos_list_1, math_ops.complex(tf.zeros([1, hidden_size]), tf.zeros([1, hidden_size]))], 0)
                sin_list_1 = array_ops.concat([sin_list_1, math_ops.complex(tf.zeros([1, hidden_size]), tf.zeros([1, hidden_size]))], 0)
            else:
                cos_list_1 = array_ops.concat([cos_list_1, tf.zeros([1, hidden_size])], 0)
                sin_list_1 = array_ops.concat([sin_list_1, tf.zeros([1, hidden_size])], 0)

        diag_vec = tf.reshape(tf.concat([cos_list_0, cos_list_1], 1), [capacity_a*2, hidden_size])
        off_vec = tf.reshape(tf.concat([sin_list_0, sin_list_1], 1), [capacity_a*2, hidden_size])

        if capacity_b != capacity_a:
            diag_vec = tf.slice(diag_vec, [0, 0], [capacity, hidden_size])
            off_vec = tf.slice(off_vec, [0, 0], [capacity, hidden_size])

    def _toTensorArray(elems):

        elems = ops.convert_to_tensor(elems)
        n = array_ops.shape(elems)[0]
        elems_ta = tensor_array_ops.TensorArray(dtype=elems.dtype, size=n, dynamic_size=False, infer_shape=True, clear_after_read=False)
        elems_ta = elems_ta.unstack(elems)
        return elems_ta

    diag_vec = _toTensorArray(diag_vec)
    off_vec = _toTensorArray(off_vec)
    if comp:
        omega = vs.get_variable(name+"omega", [hidden_size], initializer=theta_phi_initializer)
        diag = math_ops.complex(math_ops.cos(omega), math_ops.sin(omega))
    else:
        diag = None

    return diag_vec, off_vec, diag, capacity
Example #55
0
def _ExtractVolumePatchesGrad(op, grad):
    batch_size, planes_in, rows_in, cols_in, channels = [
        dim.value for dim in op.inputs[0].shape.dims
    ]
    input_bphwc = array_ops.shape(op.inputs[0])
    batch_size = input_bphwc[0]
    channels = input_bphwc[4]

    # Create indices matrix for input tensor.
    # Note that 0 is preserved for padding location,
    # so indices for input start from 1 to 1 + rows_in * cols_in.
    input_indices_num = 1 + planes_in * rows_in * cols_in
    input_idx = array_ops.reshape(
        math_ops.range(1, input_indices_num, dtype=ops.dtypes.int64),
        (1, planes_in, rows_in, cols_in, 1))
    input_idx_patched = gen_array_ops.extract_volume_patches(
        input_idx, op.get_attr("ksizes"), op.get_attr("strides"),
        op.get_attr("padding"))

    # Create indices matrix for output tensor.
    _, planes_out, rows_out, cols_out, _ = [
        dim.value for dim in op.outputs[0].shape.dims
    ]
    _, ksize_p, ksize_r, ksize_c, _ = op.get_attr("ksizes")
    # Indices for output start from 0.
    prc_indices_num = planes_out * rows_out * cols_out
    output_indices_num = prc_indices_num * ksize_p * ksize_r * ksize_c
    output_idx = array_ops.reshape(
        math_ops.range(output_indices_num, dtype=ops.dtypes.int64),
        (1, planes_out, rows_out, cols_out, ksize_p * ksize_r * ksize_c))

    # Construct mapping table for indices: (input -> output).
    idx_matrix = array_ops.concat([
        array_ops.expand_dims(input_idx_patched, axis=-1),
        array_ops.expand_dims(output_idx, axis=-1)
    ],
                                  axis=-1)
    idx_map = array_ops.reshape(idx_matrix, (-1, 2))

    sp_shape = (input_indices_num, output_indices_num)
    sp_mat_full = sparse_tensor.SparseTensor(
        idx_map, array_ops.ones([output_indices_num], dtype=grad.dtype),
        sp_shape)
    # Remove all padding locations [0, :].
    sp_mat = sparse_ops.sparse_slice(
        sp_mat_full, (1, 0), (input_indices_num - 1, output_indices_num))

    grad_expanded = array_ops.transpose(
        array_ops.reshape(_IndexedSlicesToTensorNoWarning(grad),
                          (batch_size, planes_out, rows_out, cols_out, ksize_p,
                           ksize_r, ksize_c, channels)),
        (1, 2, 3, 4, 5, 6, 0, 7))
    grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels))

    jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat)

    grad_out = array_ops.reshape(
        jac, (planes_in, rows_in, cols_in, batch_size, channels))
    grad_out = array_ops.transpose(grad_out, (3, 0, 1, 2, 4))

    return [grad_out]
Example #56
0
def _ReshapeToInput(op, grad):
    """Reshapes the gradient to the shape of the original input."""
    return array_ops.reshape(_IndexedSlicesToTensorNoWarning(grad),
                             array_ops.shape(op.inputs[0]))
Example #57
0
 def loss(x):
   y = array_ops.reshape(
       gen_math_ops.mat_mul(x, kernel), []) - array_ops.identity(1.)
   return y * y
Example #58
0
def _GatherV2Grad(op, grad):
    """Gradient for GatherV2 op."""
    # params can be large, so colocate the shape calculation with it.
    #
    # params can be very large for sparse model, array_ops.shape raises
    # exception on the Windows platform when any dimension is larger than
    # int32. params_shape is not used in optimizer apply_sparse gradients,
    # so it's fine to convert it back to int32 regardless of truncation.
    params = op.inputs[0]
    with ops.colocate_with(params):
        params_shape = array_ops.shape(params, out_type=ops.dtypes.int64)
        params_shape = math_ops.cast(params_shape, dtypes.int32)

    indices = op.inputs[1]
    indices_size = array_ops.expand_dims(array_ops.size(indices), 0)
    axis = op.inputs[2]
    axis_static = tensor_util.constant_value(axis)
    batch_dims = int(op.get_attr("batch_dims"))

    if batch_dims < 0:
        batch_dims += indices.shape.ndims

    # For axis 0 gathers, build an appropriately shaped IndexedSlices.
    if axis_static == 0:
        if context.executing_eagerly():
            with ops.device("/cpu:0"):
                params_tail_shape = array_ops.identity(params_shape)[1:]
        else:
            params_tail_shape = params_shape[1:]
        values_shape = array_ops.concat([indices_size, params_tail_shape], 0)
        values = array_ops.reshape(_IndexedSlicesToTensorNoWarning(grad),
                                   values_shape)
        indices = array_ops.reshape(indices, indices_size)
        params_grad = ops.IndexedSlices(values, indices, params_shape)
    else:
        # Handle axis by transposing the axis dimension to be the first non-batch
        # dimension, compute the gradient and transpose the result back.
        outer_shape = params_shape[:axis]
        inner_shape = params_shape[axis:][1:]
        values_shape = array_ops.concat([outer_shape, [-1], inner_shape], 0)

        values_dims = array_ops.size(values_shape)
        axis_dims = array_ops.size(outer_shape)

        outer_batches_indices = math_ops.range(batch_dims)
        batch_axis_indices = math_ops.range(batch_dims, axis_dims)
        inner_axes_indices = math_ops.range(axis_dims + 1, values_dims)

        values = array_ops.reshape(_IndexedSlicesToTensorNoWarning(grad),
                                   values_shape)

        # Move values[axis] up to values[batch_dims]
        transpose_dims = array_ops.concat([
            outer_batches_indices, [axis_dims], batch_axis_indices,
            inner_axes_indices
        ], 0)
        values_transpose = array_ops.transpose(values, transpose_dims)

        params_grad = _BatchGatherGrad(params_shape, values_transpose, indices,
                                       batch_dims, params_shape[axis])

        # Inverts the above transpose by moving dimension batch_dims back to its
        # original position.
        invert_transpose_dims = array_ops.concat([
            outer_batches_indices, batch_axis_indices + 1, [batch_dims],
            inner_axes_indices
        ], 0)
        params_grad = array_ops.transpose(params_grad, invert_transpose_dims)

    return [params_grad, None, None]
def _vec(x):
    """Stacks column of matrix to form a single column."""
    return array_ops.reshape(
        array_ops.matrix_transpose(x),
        array_ops.concat([array_ops.shape(x)[:-2], [-1]], axis=0))
Example #60
0
def histogram_fixed_width_bins(values,
                               value_range,
                               nbins=100,
                               dtype=dtypes.int32,
                               name=None):
    """Bins the given values for use in a histogram.

  Given the tensor `values`, this operation returns a rank 1 `Tensor`
  representing the indices of a histogram into which each element
  of `values` would be binned. The bins are equal width and
  determined by the arguments `value_range` and `nbins`.

  Args:
    values:  Numeric `Tensor`.
    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
      values <= value_range[0] will be mapped to hist[0],
      values >= value_range[1] will be mapped to hist[-1].
    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
    dtype:  dtype for returned histogram.
    name:  A name for this operation (defaults to 'histogram_fixed_width').

  Returns:
    A `Tensor` holding the indices of the binned values whose shape matches
    `values`.

  Examples:

  ```python
  # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
  nbins = 5
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

  with tf.get_default_session() as sess:
    indices = tf.histogram_fixed_width_bins(new_values, value_range, nbins=5)
    variables.global_variables_initializer().run()
    sess.run(indices) => [0, 0, 1, 2, 4]
  ```
  """
    with ops.name_scope(name, 'histogram_fixed_width_bins',
                        [values, value_range, nbins]):
        values = ops.convert_to_tensor(values, name='values')
        shape = array_ops.shape(values)

        values = array_ops.reshape(values, [-1])
        value_range = ops.convert_to_tensor(value_range, name='value_range')
        nbins = ops.convert_to_tensor(nbins, dtype=dtypes.int32, name='nbins')
        nbins_float = math_ops.cast(nbins, values.dtype)

        # Map tensor values that fall within value_range to [0, 1].
        scaled_values = math_ops.truediv(values - value_range[0],
                                         value_range[1] - value_range[0],
                                         name='scaled_values')

        # map tensor values within the open interval value_range to {0,.., nbins-1},
        # values outside the open interval will be zero or less, or nbins or more.
        indices = math_ops.floor(nbins_float * scaled_values, name='indices')

        # Clip edge cases (e.g. value = value_range[1]) or "outliers."
        indices = math_ops.cast(
            clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)
        return array_ops.reshape(indices, shape)