def filter_functions():
    take_all = lambda x: constant_op.constant(True)
    is_zero = lambda x: math_ops.equal(x, 0)
    greater = lambda x: math_ops.greater(x + 5, 0)

    tests = []
    filters = [take_all, is_zero, greater]
    identity = lambda x: x
    for x, predicate_1 in enumerate(filters):
      for y, predicate_2 in enumerate(filters):
        tests.append(("Mixed{}{}".format(x, y), identity,
                      [predicate_1, predicate_2]))
        for z, predicate_3 in enumerate(filters):
          tests.append(("Mixed{}{}{}".format(x, y, z), identity,
                        [predicate_1, predicate_2, predicate_3]))

    take_all_multiple = lambda x, y: constant_op.constant(True)
    # Multi output
    tests.append(("Multi1", lambda x: (x, x),
                  [take_all_multiple, take_all_multiple]))
    tests.append(("Multi2", lambda x: (x, 2), [
        take_all_multiple,
        lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0)
    ]))
    return tuple(tests)
Beispiel #2
0
 def _process_labels(self, labels):
   if isinstance(labels, sparse_tensor.SparseTensor):
     if labels.dtype == dtypes.string:
       label_ids_values = lookup_ops.index_table_from_tensor(
           vocabulary_list=tuple(self._label_vocabulary),
           name='class_id_lookup').lookup(labels.values)
       label_ids = sparse_tensor.SparseTensor(
           indices=labels.indices,
           values=label_ids_values,
           dense_shape=labels.dense_shape)
     else:
       label_ids = labels
     return math_ops.to_int64(
         sparse_ops.sparse_to_indicator(label_ids, self._n_classes))
   msg = ('labels shape must be [batch_size, {}]. '
          'Given: ').format(self._n_classes)
   labels_shape = array_ops.shape(labels)
   check_rank_op = control_flow_ops.Assert(
       math_ops.equal(array_ops.rank(labels), 2),
       data=[msg, labels_shape])
   check_label_dim = control_flow_ops.Assert(
       math_ops.equal(labels_shape[-1], self._n_classes),
       data=[msg, labels_shape])
   with ops.control_dependencies([check_rank_op, check_label_dim]):
     return array_ops.identity(labels)
def rot90(image, k=1, name=None):
  """Rotate an image counter-clockwise by 90 degrees.

  Args:
    image: A 3-D tensor of shape `[height, width, channels]`.
    k: A scalar integer. The number of times the image is rotated by 90 degrees.
    name: A name for this operation (optional).

  Returns:
    A rotated 3-D tensor of the same type and shape as `image`.
  """
  with ops.name_scope(name, 'rot90', [image, k]) as scope:
    image = ops.convert_to_tensor(image, name='image')
    _Check3DImage(image, require_static=False)
    k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k')
    k.get_shape().assert_has_rank(0)
    k = math_ops.mod(k, 4)

    def _rot90():
      return array_ops.transpose(array_ops.reverse_v2(image, [1]),
                                 [1, 0, 2])
    def _rot180():
      return array_ops.reverse_v2(image, [0, 1])
    def _rot270():
      return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]),
                                  [1])
    cases = [(math_ops.equal(k, 1), _rot90),
             (math_ops.equal(k, 2), _rot180),
             (math_ops.equal(k, 3), _rot270)]

    ret = control_flow_ops.case(cases, default=lambda: image, exclusive=True,
                                name=scope)
    ret.set_shape([None, None, image.get_shape()[2]])
    return ret
  def map_and_filter_functions():
    identity = lambda x: x
    increment = lambda x: x + 1
    minus_five = lambda x: x - 5

    def increment_and_square(x):
      y = x + 1
      return y * y

    take_all = lambda x: constant_op.constant(True)
    is_zero = lambda x: math_ops.equal(x, 0)
    is_odd = lambda x: math_ops.equal(x % 2, 0)
    greater = lambda x: math_ops.greater(x + 5, 0)

    functions = [identity, increment, minus_five, increment_and_square]
    filters = [take_all, is_zero, is_odd, greater]
    tests = []

    for x, fun in enumerate(functions):
      for y, predicate in enumerate(filters):
        tests.append(("Mixed{}{}".format(x, y), fun, predicate))

    # Multi output
    tests.append(("Multi1", lambda x: (x, x),
                  lambda x, y: constant_op.constant(True)))
    tests.append(
        ("Multi2", lambda x: (x, 2),
         lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0)))
    return tuple(tests)
Beispiel #5
0
 def _process_labels(self, labels):
   if labels is None:
     raise ValueError(
         'You must provide a labels Tensor. Given: None. '
         'Suggested troubleshooting steps: Check that your data contain '
         'your label feature. Check that your input_fn properly parses and '
         'returns labels.')
   if isinstance(labels, sparse_tensor.SparseTensor):
     if labels.dtype == dtypes.string:
       label_ids_values = lookup_ops.index_table_from_tensor(
           vocabulary_list=tuple(self._label_vocabulary),
           name='class_id_lookup').lookup(labels.values)
       label_ids = sparse_tensor.SparseTensor(
           indices=labels.indices,
           values=label_ids_values,
           dense_shape=labels.dense_shape)
     else:
       label_ids = labels
     return math_ops.to_int64(
         sparse_ops.sparse_to_indicator(label_ids, self._n_classes))
   msg = ('labels shape must be [batch_size, {}]. '
          'Given: ').format(self._n_classes)
   labels_shape = array_ops.shape(labels)
   check_rank_op = control_flow_ops.Assert(
       math_ops.equal(array_ops.rank(labels), 2),
       data=[msg, labels_shape])
   check_label_dim = control_flow_ops.Assert(
       math_ops.equal(labels_shape[-1], self._n_classes),
       data=[msg, labels_shape])
   with ops.control_dependencies([check_rank_op, check_label_dim]):
     return array_ops.identity(labels)
  def _decode(self, image_buffer, image_format):
    """Decodes the image buffer.

    Args:
      image_buffer: The tensor representing the encoded image tensor.
      image_format: The image format for the image in `image_buffer`. If image
        format is `raw`, all images are expected to be in this format, otherwise
        this op can decode a mix of `jpg` and `png` formats.

    Returns:
      A tensor that represents decoded image of self._shape, or
      (?, ?, self._channels) if self._shape is not specified.
    """
    def decode_image():
      """Decodes a png or jpg based on the headers."""
      return image_ops.decode_image(image_buffer, self._channels)

    def decode_raw():
      """Decodes a raw image."""
      return parsing_ops.decode_raw(image_buffer, out_type=self._dtype)

    pred_fn_pairs = {
        math_ops.logical_or(
            math_ops.equal(image_format, 'raw'),
            math_ops.equal(image_format, 'RAW')): decode_raw,
    }
    image = control_flow_ops.case(
        pred_fn_pairs, default=decode_image, exclusive=True)

    image.set_shape([None, None, self._channels])
    if self._shape is not None:
      image = array_ops.reshape(image, self._shape)

    return image
Beispiel #7
0
def _check_labels_and_scores(boolean_labels, scores, check_shape):
  """Check the rank of labels/scores, return tensor versions."""
  with ops.op_scope([boolean_labels, scores], '_check_labels_and_scores'):
    boolean_labels = ops.convert_to_tensor(boolean_labels,
                                           name='boolean_labels')
    scores = ops.convert_to_tensor(scores, name='scores')

    if boolean_labels.dtype != dtypes.bool:
      raise ValueError(
          'Argument boolean_labels should have dtype bool.  Found: %s',
          boolean_labels.dtype)

    if check_shape:
      labels_rank_1 = logging_ops.Assert(
          math_ops.equal(1, array_ops.rank(boolean_labels)),
          ['Argument boolean_labels should have rank 1.  Found: ',
           boolean_labels.name, array_ops.shape(boolean_labels)])

      scores_rank_1 = logging_ops.Assert(
          math_ops.equal(1, array_ops.rank(scores)),
          ['Argument scores should have rank 1.  Found: ', scores.name,
           array_ops.shape(scores)])

      with ops.control_dependencies([labels_rank_1, scores_rank_1]):
        return boolean_labels, scores
    else:
      return boolean_labels, scores
  def _broadcast_uniform_partitioned_dimension(self, axis, lengths):
    """Broadcasts the partitioned dimension `axis` to match `lengths`."""
    axis_dim_size = self.dimension_size(axis)
    partitioned_sizes = list(self._partitioned_dim_sizes[:axis])

    if lengths.shape.ndims == 0:
      lengths = array_ops.where(
          math_ops.equal(axis_dim_size, 1), lengths, axis_dim_size)
      repeats = array_ops.where(math_ops.equal(axis_dim_size, 1), lengths, 1)
      splits = array_ops.stack([0, self.num_slices_in_dimension(axis)])
    else:
      splits = math_ops.range(
          array_ops.size(lengths, out_type=self.dim_size_dtype) + 1)
      repeats = lengths

    partitioned_sizes.append(lengths)

    for dim_size in self._partitioned_dim_sizes[axis + 1:]:
      if dim_size.shape.ndims == 0:
        partitioned_sizes.append(dim_size)
        splits *= dim_size
      else:
        partitioned_sizes.append(
            ragged_util.repeat_ranges(dim_size, splits, repeats))
        splits = array_ops.gather(
            ragged_util.lengths_to_splits(dim_size), splits)
    inner_sizes = self._inner_dim_sizes
    return RaggedTensorDynamicShape(partitioned_sizes, inner_sizes)
def _compute_zeroone_score(labels, predictions):
  zeroone_score = math_ops.to_float(
      math_ops.equal(
          math_ops.reduce_sum(
              math_ops.to_int32(math_ops.equal(labels, predictions))),
          array_ops.shape(labels)[0]))
  return zeroone_score
  def _decode(self, image_buffer, image_format):
    """Decodes the image buffer.

    Args:
      image_buffer: T tensor representing the encoded image tensor.
      image_format: The image format for the image in `image_buffer`.

    Returns:
      A decoder image.
    """
    def decode_png():
      return image_ops.decode_png(image_buffer, self._channels)
    def decode_raw():
      return parsing_ops.decode_raw(image_buffer, dtypes.uint8)
    def decode_jpg():
      return image_ops.decode_jpeg(image_buffer, self._channels)

    image = control_flow_ops.case({
        math_ops.logical_or(math_ops.equal(image_format, 'png'),
                            math_ops.equal(image_format, 'PNG')): decode_png,
        math_ops.logical_or(math_ops.equal(image_format, 'raw'),
                            math_ops.equal(image_format, 'RAW')): decode_raw,
    }, default=decode_jpg, exclusive=True)

    image.set_shape([None, None, self._channels])
    if self._shape is not None:
      image = array_ops.reshape(image, self._shape)

    return image
Beispiel #11
0
def get_seed(seed):
  """Returns the local seeds an operation should use given an op-specific seed.

  See `tf.compat.v1.get_seed` for more details. This wrapper adds support for
  the case
  where `seed` may be a tensor.

  Args:
    seed: An integer or a `tf.int64` scalar tensor.

  Returns:
    A tuple of two `tf.int64` scalar tensors that should be used for the local
    seed of the calling dataset.
  """
  seed, seed2 = random_seed.get_seed(seed)
  if seed is None:
    seed = constant_op.constant(0, dtype=dtypes.int64, name="seed")
  else:
    seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed")
  if seed2 is None:
    seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2")
  else:
    with ops.name_scope("seed2") as scope:
      seed2 = ops.convert_to_tensor(seed2, dtype=dtypes.int64)
      seed2 = array_ops.where(
          math_ops.logical_and(
              math_ops.equal(seed, 0), math_ops.equal(seed2, 0)),
          constant_op.constant(2**31 - 1, dtype=dtypes.int64),
          seed2,
          name=scope)
  return seed, seed2
Beispiel #12
0
def _safe_div(numerator, denominator, name="value"):
  """Computes a safe divide which returns 0 if the denominator is zero.

  Note that the function contains an additional conditional check that is
  necessary for avoiding situations where the loss is zero causing NaNs to
  creep into the gradient computation.

  Args:
    numerator: An arbitrary `Tensor`.
    denominator: `Tensor` whose shape matches `numerator` and whose values are
      assumed to be non-negative.
    name: An optional name for the returned op.

  Returns:
    The element-wise value of the numerator divided by the denominator.
  """
  if isinstance(denominator, float):
    if math_ops.equal(denominator, 0.0):
      return ops.convert_to_tensor(0.0, dtype=numerator.dtype)
    return math_ops.div(numerator, denominator)
  if context.in_eager_mode() and denominator._rank() == 0:  # pylint: disable=protected-access
    if math_ops.equal(denominator, 0.0):
      return ops.convert_to_tensor(0.0, dtype=numerator.dtype)
    return math_ops.div(numerator, denominator)
  return array_ops.where(
      math_ops.greater(denominator, 0),
      math_ops.div(numerator, array_ops.where(
          math_ops.equal(denominator, 0),
          array_ops.ones_like(denominator), denominator)),
      array_ops.zeros_like(numerator),
      name=name)
Beispiel #13
0
def same_dynamic_shape(a, b):
  """Returns whether a and b have the same dynamic shape.

  Args:
    a: `Tensor`
    b: `Tensor`

  Returns:
    `Boolean` `Tensor` representing if both tensors have the same shape.
  """
  a = ops.convert_to_tensor(a, name="a")
  b = ops.convert_to_tensor(b, name="b")

  # One of the shapes isn't fully defined, so we need to use the dynamic
  # shape.
  return control_flow_ops.cond(
      math_ops.equal(array_ops.rank(a), array_ops.rank(b)),
      # Here we can't just do math_ops.equal(a.shape, b.shape), since
      # static shape inference may break the equality comparison between
      # shape(a) and shape(b) in math_ops.equal.
      lambda: math_ops.reduce_all(math_ops.equal(
          array_ops.concat_v2((
              array_ops.shape(a),
              array_ops.shape(b)), 0),
          array_ops.concat_v2((
              array_ops.shape(b),
              array_ops.shape(a)), 0))),
      lambda: constant_op.constant(False))
def _compute_zeroone_score(labels, predictions):
  zeroone_score = math_ops.cast(
      math_ops.equal(
          math_ops.reduce_sum(
              math_ops.cast(math_ops.equal(labels, predictions), dtypes.int32)),
          array_ops.shape(labels)[0]),
      dtypes.float32)
  return zeroone_score
Beispiel #15
0
def _dynamic_rank_in(actual_rank, given_ranks):
  if len(given_ranks) < 1:
    return ops.convert_to_tensor(False)
  result = math_ops.equal(given_ranks[0], actual_rank)
  for given_rank in given_ranks[1:]:
    result = math_ops.logical_or(
        result, math_ops.equal(given_rank, actual_rank))
  return result
 def testCase_dict(self):
   x = constant_op.constant(2)
   conditions = {
       math_ops.equal(x, 1): lambda: constant_op.constant(2),
       math_ops.equal(x, 2): lambda: constant_op.constant(4)
   }
   output = control_flow_ops.case(conditions, exclusive=True)
   self.assertEqual(4, self.evaluate(output))
def remove_squeezable_dimensions(
    labels, predictions, expected_rank_diff=0, name=None):
  """Squeeze last dim if ranks differ from expected by exactly 1.

  In the common case where we expect shapes to match, `expected_rank_diff`
  defaults to 0, and we squeeze the last dimension of the larger rank if they
  differ by 1.

  But, for example, if `labels` contains class IDs and `predictions` contains 1
  probability per class, we expect `predictions` to have 1 more dimension than
  `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze
  `labels` if `rank(predictions) - rank(labels) == 0`, and
  `predictions` if `rank(predictions) - rank(labels) == 2`.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    labels: Label values, a `Tensor` whose dimensions match `predictions`.
    predictions: Predicted values, a `Tensor` of arbitrary dimensions.
    expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
    name: Name of the op.

  Returns:
    Tuple of `labels` and `predictions`, possibly with last dim squeezed.
  """
  with ops.name_scope(name, 'remove_squeezable_dimensions',
                      [labels, predictions]):
    predictions = ops.convert_to_tensor(predictions)
    labels = ops.convert_to_tensor(labels)
    predictions_shape = predictions.get_shape()
    predictions_rank = predictions_shape.ndims
    labels_shape = labels.get_shape()
    labels_rank = labels_shape.ndims
    if (labels_rank is not None) and (predictions_rank is not None):
      # Use static rank.
      rank_diff = predictions_rank - labels_rank
      if rank_diff == expected_rank_diff + 1:
        predictions = array_ops.squeeze(predictions, [-1])
      elif rank_diff == expected_rank_diff - 1:
        labels = array_ops.squeeze(labels, [-1])
      return labels, predictions

    # Use dynamic rank.
    rank_diff = array_ops.rank(predictions) - array_ops.rank(labels)
    if (predictions_rank is None) or (
        predictions_shape.dims[-1].is_compatible_with(1)):
      predictions = control_flow_ops.cond(
          math_ops.equal(expected_rank_diff + 1, rank_diff),
          lambda: array_ops.squeeze(predictions, [-1]),
          lambda: predictions)
    if (labels_rank is None) or (
        labels_shape.dims[-1].is_compatible_with(1)):
      labels = control_flow_ops.cond(
          math_ops.equal(expected_rank_diff - 1, rank_diff),
          lambda: array_ops.squeeze(labels, [-1]),
          lambda: labels)
    return labels, predictions
Beispiel #18
0
 def _loss_fn(labels, logits):
   check_labels = control_flow_ops.Assert(
       math_ops.reduce_all(math_ops.equal(labels, labels_input)),
       data=[labels])
   check_logits = control_flow_ops.Assert(
       math_ops.reduce_all(math_ops.equal(logits, logits_input)),
       data=[logits])
   with ops.control_dependencies([check_labels, check_logits]):
     return constant_op.constant(loss)
 def testCase_withDefault(self):
   x = array_ops.placeholder(dtype=dtypes.int32, shape=[])
   conditions = [(math_ops.equal(x, 1), lambda: constant_op.constant(2)),
                 (math_ops.equal(x, 2), lambda: constant_op.constant(4))]
   default = lambda: constant_op.constant(6)
   output = control_flow_ops.case(conditions, default, exclusive=True)
   with self.test_session() as sess:
     self.assertEqual(sess.run(output, feed_dict={x: 1}), 2)
     self.assertEqual(sess.run(output, feed_dict={x: 2}), 4)
     self.assertEqual(sess.run(output, feed_dict={x: 3}), 6)
  def _decode(self, image_buffer, image_format):
    """Decodes the image buffer.

    Args:
      image_buffer: The tensor representing the encoded image tensor.
      image_format: The image format for the image in `image_buffer`. If image
        format is `raw`, all images are expected to be in this format, otherwise
        this op can decode a mix of `jpg` and `png` formats.

    Returns:
      A tensor that represents decoded image of self._shape, or
      (?, ?, self._channels) if self._shape is not specified.
    """

    def decode_image():
      """Decodes a image based on the headers."""
      return math_ops.cast(
          image_ops.decode_image(image_buffer, channels=self._channels),
          self._dtype)

    def decode_jpeg():
      """Decodes a jpeg image with specified '_dct_method'."""
      return math_ops.cast(
          image_ops.decode_jpeg(
              image_buffer,
              channels=self._channels,
              dct_method=self._dct_method), self._dtype)

    def check_jpeg():
      """Checks if an image is jpeg."""
      # For jpeg, we directly use image_ops.decode_jpeg rather than decode_image
      # in order to feed the jpeg specify parameter 'dct_method'.
      return control_flow_ops.cond(
          image_ops.is_jpeg(image_buffer),
          decode_jpeg,
          decode_image,
          name='cond_jpeg')

    def decode_raw():
      """Decodes a raw image."""
      return parsing_ops.decode_raw(image_buffer, out_type=self._dtype)

    pred_fn_pairs = {
        math_ops.logical_or(
            math_ops.equal(image_format, 'raw'),
            math_ops.equal(image_format, 'RAW')): decode_raw,
    }
    image = control_flow_ops.case(
        pred_fn_pairs, default=check_jpeg, exclusive=True)

    image.set_shape([None, None, self._channels])
    if self._shape is not None:
      image = array_ops.reshape(image, self._shape)

    return image
 def testCase_withoutDefault(self):
   x = array_ops.placeholder(dtype=dtypes.int32, shape=[])
   conditions = [(math_ops.equal(x, 1), lambda: constant_op.constant(2)),
                 (math_ops.equal(x, 2), lambda: constant_op.constant(4)),
                 (math_ops.equal(x, 3), lambda: constant_op.constant(6))]
   output = control_flow_ops.case(conditions, exclusive=True)
   with self.test_session() as sess:
     self.assertEqual(sess.run(output, feed_dict={x: 1}), 2)
     self.assertEqual(sess.run(output, feed_dict={x: 2}), 4)
     self.assertEqual(sess.run(output, feed_dict={x: 3}), 6)
     with self.assertRaisesRegexp(errors.InvalidArgumentError, "Input error:"):
       sess.run(output, feed_dict={x: 4})
Beispiel #22
0
def _maybe_convert_labels(y_true):
  """Converts binary labels into -1/1."""
  are_zeros = math_ops.equal(y_true, 0)
  are_ones = math_ops.equal(y_true, 1)
  is_binary = math_ops.reduce_all(math_ops.logical_or(are_zeros, are_ones))

  def _convert_binary_labels():
    # Convert the binary labels to -1 or 1.
    return 2. * y_true - 1.

  updated_y_true = smart_cond.smart_cond(is_binary,
                                         _convert_binary_labels, lambda: y_true)
  return updated_y_true
 def testCase_multiple_matches_exclusive(self):
   x = array_ops.placeholder(dtype=dtypes.int32, shape=[])
   conditions = [(math_ops.equal(x, 1), lambda: constant_op.constant(2)),
                 (math_ops.equal(x, 2), lambda: constant_op.constant(4)),
                 (math_ops.equal(x, 2), lambda: constant_op.constant(6))]
   default = lambda: constant_op.constant(8)
   output = control_flow_ops.case(conditions, default, exclusive=True)
   with self.test_session() as sess:
     self.assertEqual(sess.run(output, feed_dict={x: 1}), 2)
     self.assertEqual(sess.run(output, feed_dict={x: 3}), 8)
     with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                  "More than one condition evaluated as True"):
       sess.run(output, feed_dict={x: 2})
Beispiel #24
0
  def testEqual(self):
    # Scalar inputs.
    tf_val = math_ops.equal(constant_op.constant(1), constant_op.constant(1))
    self.assertEqual(tensor_util.constant_value(tf_val), True)

    tf_val = math_ops.equal(constant_op.constant(1), constant_op.constant(0))
    self.assertEqual(tensor_util.constant_value(tf_val), False)

    # Shaped inputs with broadcast semantics.
    tf_val = math_ops.equal(constant_op.constant([[0, 1]]),
                            constant_op.constant([[0], [1]]))
    c_val = tensor_util.constant_value(tf_val)
    self.assertAllEqual(c_val, [[True, False], [False, True]])
Beispiel #25
0
    def control_map_fn(x, y):

      def multiply():
        return x * 2

      def divide():
        return x // 2

      pred_fn_pairs = {
          math_ops.logical_or(math_ops.equal(y, 2), math_ops.equal(y, 3)):
              divide,
      }

      return control_flow_ops.case(
          pred_fn_pairs, default=multiply, exclusive=True)
Beispiel #26
0
 def _process_labels(self, labels):
   if isinstance(labels, sparse_tensor.SparseTensor):
     return math_ops.to_int64(
         sparse_ops.sparse_to_indicator(labels, self._n_classes))
   msg = ('labels shape must be [batch_size, {}]. '
          'Given: ').format(self._n_classes)
   labels_shape = array_ops.shape(labels)
   check_rank_op = control_flow_ops.Assert(
       math_ops.equal(array_ops.rank(labels), 2),
       data=[msg, labels_shape])
   check_label_dim = control_flow_ops.Assert(
       math_ops.equal(labels_shape[-1], self._n_classes),
       data=[msg, labels_shape])
   with ops.control_dependencies([check_rank_op, check_label_dim]):
     return array_ops.identity(labels)
  def __call__(self, step):
    with ops.name_scope(
        self.name, "PolynomialDecay",
        [self.initial_learning_rate, step, self.decay_steps,
         self.end_learning_rate, self.power]
    ) as name:
      initial_learning_rate = ops.convert_to_tensor(
          self.initial_learning_rate, name="initial_learning_rate")
      dtype = initial_learning_rate.dtype
      end_learning_rate = math_ops.cast(self.end_learning_rate, dtype)
      power = math_ops.cast(self.power, dtype)

      global_step_recomp = math_ops.cast(step, dtype)
      decay_steps_recomp = math_ops.cast(self.decay_steps, dtype)
      if self.cycle:
        # Find the first multiple of decay_steps that is bigger than
        # global_step. If global_step is zero set the multiplier to 1
        multiplier = control_flow_ops.cond(
            math_ops.equal(global_step_recomp, 0), lambda: 1.0,
            lambda: math_ops.ceil(global_step_recomp / self.decay_steps))
        decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
      else:
        # Make sure that the global_step used is not bigger than decay_steps.
        global_step_recomp = math_ops.minimum(global_step_recomp,
                                              self.decay_steps)

      p = math_ops.div(global_step_recomp, decay_steps_recomp)
      return math_ops.add(
          math_ops.multiply(initial_learning_rate - end_learning_rate,
                            math_ops.pow(1 - p, power)),
          end_learning_rate,
          name=name)
  def testNonSequenceNestedStructure(self):
    components = np.array([1, 2, 3], dtype=np.int64)

    dataset = dataset_ops.Dataset.from_tensors(components)
    self.assertEquals(dtypes.int64, dataset.output_types)
    self.assertEquals([3], dataset.output_shapes)

    dataset = dataset.filter(
        lambda x: math_ops.reduce_all(math_ops.equal(x, components)))
    self.assertEquals(dtypes.int64, dataset.output_types)
    self.assertEquals([3], dataset.output_shapes)

    dataset = dataset.map(lambda x: array_ops.stack([x, x]))
    self.assertEquals(dtypes.int64, dataset.output_types)
    self.assertEquals([2, 3], dataset.output_shapes)

    dataset = dataset.flat_map(
        lambda x: dataset_ops.Dataset.from_tensor_slices(x))
    self.assertEquals(dtypes.int64, dataset.output_types)
    self.assertEquals([3], dataset.output_shapes)

    iterator = dataset.make_one_shot_iterator()
    get_next = iterator.get_next()
    self.assertEquals(dtypes.int64, get_next.dtype)
    self.assertEquals([3], get_next.shape)
Beispiel #29
0
def _num_present(losses, weights, per_batch=False):
  """Computes the number of elements in the loss function induced by `weights`.

  A given weights tensor induces different numbers of usable elements in the
  `losses` tensor. The `weights` tensor is broadcast across `losses` for all
  possible dimensions. For example, if `losses` is a tensor of dimension
  `[4, 5, 6, 3]` and `weights` is a tensor of shape `[4, 5]`, then `weights` is,
  in effect, tiled to match the shape of `losses`. Following this effective
  tile, the total number of present elements is the number of non-zero weights.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    weights: `Tensor` of shape `[]`, `[batch_size]` or
      `[batch_size, d1, ... dK]`, where K < N.
    per_batch: Whether to return the number of elements per batch or as a sum
      total.

  Returns:
    The number of present (non-zero) elements in the losses tensor. If
      `per_batch` is `True`, the value is returned as a tensor of size
      `[batch_size]`. Otherwise, a single scalar tensor is returned.
  """
  with ops.name_scope(None, "num_present", (losses, weights)) as scope:
    weights = math_ops.to_float(weights)
    present = array_ops.where(
        math_ops.equal(weights, 0.0),
        array_ops.zeros_like(weights),
        array_ops.ones_like(weights))
    present = weights_broadcast_ops.broadcast_weights(present, losses)
    if per_batch:
      return math_ops.reduce_sum(
          present, axis=math_ops.range(1, array_ops.rank(present)),
          keep_dims=True, name=scope)
    return math_ops.reduce_sum(present, name=scope)
  def _make_columnar(self, x):
    """Ensures non-scalar input has at least one column.

    Example:
      If `x = [1, 2, 3]` then the output is `[[1], [2], [3]]`.

      If `x = [[1, 2, 3], [4, 5, 6]]` then the output is unchanged.

      If `x = 1` then the output is unchanged.

    Args:
      x: `Tensor`.

    Returns:
      columnar_x: `Tensor` with at least two dimensions.
    """
    if x.get_shape().ndims is not None:
      if x.get_shape().ndims == 1:
        x = x[array_ops.newaxis, :]
      return x
    shape = array_ops.shape(x)
    maybe_expanded_shape = array_ops.concat([
        shape[:-1],
        distribution_util.pick_vector(
            math_ops.equal(array_ops.rank(x), 1),
            [1], np.array([], dtype=np.int32)),
        shape[-1:],
    ], 0)
    return array_ops.reshape(x, maybe_expanded_shape)
Beispiel #31
0
 def defaults_two():
   return control_flow_ops.cond(
       math_ops.equal(math_ops.mod(x, 2), 0),
       multiply,
       divide,
       name="cond_mult")
def move_dimension(x, source_idx, dest_idx):
    """Move a single tensor dimension within its shape.

  This is a special case of `tf.transpose()`, which applies
  arbitrary permutations to tensor dimensions.

  Args:
    x: Tensor of rank `ndims`.
    source_idx: Integer index into `x.shape` (negative indexing is
      supported).
    dest_idx: Integer index into `x.shape` (negative indexing is
      supported).

  Returns:
    x_perm: Tensor of rank `ndims`, in which the dimension at original
     index `source_idx` has been moved to new index `dest_idx`, with
     all other dimensions retained in their original order.

  Example:

  ```python
  x = tf.placeholder(shape=[200, 30, 4, 1, 6])
  x_perm = _move_dimension(x, 1, 1) # no-op
  x_perm = _move_dimension(x, 0, 3) # result shape [30, 4, 1, 200, 6]
  x_perm = _move_dimension(x, 0, -2) # equivalent to previous
  x_perm = _move_dimension(x, 4, 2) # result shape [200, 30, 6, 4, 1]
  ```
  """
    ndims = util.prefer_static_rank(x)
    if isinstance(source_idx, int):
        dtype = dtypes.int32
    else:
        dtype = dtypes.as_dtype(source_idx.dtype)

    # Handle negative indexing. Since ndims might be dynamic, this makes
    # source_idx and dest_idx also possibly dynamic.
    if source_idx < 0:
        source_idx = ndims + source_idx
    if dest_idx < 0:
        dest_idx = ndims + dest_idx

    # Construct the appropriate permutation of dimensions, depending
    # whether the source is before or after the destination.
    def move_left_permutation():
        return util.prefer_static_value(
            array_ops.concat([
                math_ops.range(0, dest_idx, dtype=dtype), [source_idx],
                math_ops.range(dest_idx, source_idx, dtype=dtype),
                math_ops.range(source_idx + 1, ndims, dtype=dtype)
            ],
                             axis=0))

    def move_right_permutation():
        return util.prefer_static_value(
            array_ops.concat([
                math_ops.range(0, source_idx, dtype=dtype),
                math_ops.range(source_idx + 1, dest_idx + 1, dtype=dtype),
                [source_idx],
                math_ops.range(dest_idx + 1, ndims, dtype=dtype)
            ],
                             axis=0))

    def x_permuted():
        return array_ops.transpose(x,
                                   perm=smart_cond.smart_cond(
                                       source_idx < dest_idx,
                                       move_right_permutation,
                                       move_left_permutation))

    # One final conditional to handle the special case where source
    # and destination indices are equal.
    return smart_cond.smart_cond(math_ops.equal(source_idx, dest_idx),
                                 lambda: x, x_permuted)
Beispiel #33
0
def get_weights_and_check_match_logits(features,
                                       weight_column,
                                       logits,
                                       allow_per_logit_weights=False):
    """Fetches weights from features and checks that the shape matches logits.

  Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape
  can be either:
  * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`.
  * [D0, D1, ... DN, 1]
  * [D0, D1, ... DN]: In this case, weights is reshaped into
    [D0, D1, ... DN, 1] to work with weight broadcasting rules.

  Args:
    features: The features dict that contains weights.
    weight_column: The weight column. If not given, this method returns 1.
    logits: logits Tensor.
    allow_per_logit_weights: Boolean. Whether we allow weights along the logits
      dimension, namely shape `[D0, D1, ... DN, logits_dimension]`.

  Returns:
    Validated and reshaped weights Tensor.

  Raises:
    ValueError: If the weights `Tensor` cannot be cast into float.
  """
    if allow_per_logit_weights:
        err_msg = (
            'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or '
            '[D0, D1, ... DN, logits_dimension]')
    else:
        err_msg = (
            'weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]')
    with ops.name_scope('weights',
                        values=tuple(six.itervalues(features)) +
                        (logits, )) as scope:
        # Fetch the weights.
        if weight_column is None:
            return 1.
        # TODO(b/117839674): update feature_column
        if isinstance(weight_column, six.string_types):
            weight_column = feature_column_lib.numeric_column(
                key=weight_column, shape=(1, ))
        if not isinstance(weight_column, feature_column_lib._NumericColumn):  # pylint: disable=protected-access
            raise TypeError(
                'Weight column must be either a string or _NumericColumn.'
                ' Given type: {}.'.format(type(weight_column)))
        weights = weight_column._get_dense_tensor(  # pylint: disable=protected-access
            feature_column_lib._LazyBuilder(features))  # pylint: disable=protected-access
        if not (weights.dtype.is_floating or weights.dtype.is_integer):
            raise ValueError('Weight column should be castable to float. '
                             'Given dtype: {}'.format(weights.dtype))
        weights = math_ops.to_float(weights, name='weights')
        # Validate the weights shape.
        # Eager mode.
        if context.executing_eagerly():
            weights_shape = weights._shape_tuple()  # pylint: disable=protected-access
            logits_shape = logits._shape_tuple()  # pylint: disable=protected-access
            weights_rank = weights._rank()  # pylint: disable=protected-access
            logits_rank = logits._rank()  # pylint: disable=protected-access
            if (weights_rank is not None and logits_rank is not None
                    and weights_rank == logits_rank - 1):
                if logits_shape[:-1] != weights_shape:
                    raise ValueError(
                        '{}, logits_shape: {}. weights_shape: {}.'.format(
                            err_msg, logits_shape, weights_shape))
                return array_ops.expand_dims(weights, -1, name=scope)
            supported_weights_shape = logits_shape[:-1] + (1, )
            if allow_per_logit_weights:
                if (logits_shape != weights_shape
                        and supported_weights_shape != weights_shape):
                    raise ValueError(
                        '{}, logits_shape: {}. weights_shape: {}.'.format(
                            err_msg, logits_shape, weights_shape))
            else:
                if supported_weights_shape != weights_shape:
                    raise ValueError(
                        '{}, logits_shape: {}. weights_shape: {}.'.format(
                            err_msg, logits_shape, weights_shape))
            return weights

        # Graph mode.
        weights_shape = array_ops.shape(weights, name='weights_shape')
        logits_shape = array_ops.shape(logits, name='logits_shape')
        if (weights.shape.ndims is not None and logits.shape.ndims is not None
                and weights.shape.ndims == logits.shape.ndims - 1):
            assert_dimension = check_ops.assert_equal(logits_shape[:-1],
                                                      weights_shape,
                                                      message=err_msg,
                                                      data=[
                                                          'logits_shape: ',
                                                          logits_shape,
                                                          'weights_shape: ',
                                                          weights_shape
                                                      ])
            with ops.control_dependencies([assert_dimension]):
                return array_ops.expand_dims(weights, -1, name=scope)
        supported_weights_shape = array_ops.concat([logits_shape[:-1], [1]],
                                                   axis=0)
        if allow_per_logit_weights:
            condition = math_ops.reduce_any([
                math_ops.reduce_all(math_ops.equal(logits_shape,
                                                   weights_shape)),
                math_ops.reduce_all(
                    math_ops.equal(supported_weights_shape, weights_shape))
            ])
            assert_dimension = control_flow_ops.Assert(condition=condition,
                                                       data=[
                                                           err_msg,
                                                           'logits_shape: ',
                                                           logits_shape,
                                                           'weights_shape: ',
                                                           weights_shape
                                                       ])
        else:
            assert_dimension = check_ops.assert_equal(supported_weights_shape,
                                                      weights_shape,
                                                      message=err_msg,
                                                      data=[
                                                          'logits_shape: ',
                                                          logits_shape,
                                                          'weights_shape: ',
                                                          weights_shape
                                                      ])
        with ops.control_dependencies([assert_dimension]):
            return array_ops.identity(weights, name=scope)
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight,
                      coverage_penalty_weight):
    """Performs a single step of Beam Search Decoding.

  Args:
    time: Beam search time step, should start at 0. At time 0 we assume
      that all beams are equal and consider only the first beam for
      continuations.
    logits: Logits at the current time step. A tensor of shape
      `[batch_size, beam_width, vocab_size]`
    next_cell_state: The next state from the cell, e.g. an instance of
      AttentionWrapperState if the cell is attentional.
    beam_state: Current state of the beam search.
      An instance of `BeamSearchDecoderState`.
    batch_size: The batch size for this input.
    beam_width: Python int.  The size of the beams.
    end_token: The int32 end token.
    length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
    coverage_penalty_weight: Float weight to penalize the coverage of source
      sentence. Disabled with 0.0.

  Returns:
    A new beam state.
  """
    static_batch_size = tensor_util.constant_value(batch_size)

    # Calculate the current lengths of the predictions
    prediction_lengths = beam_state.lengths
    previously_finished = beam_state.finished
    not_finished = math_ops.logical_not(previously_finished)

    # Calculate the total log probs for the new hypotheses
    # Final Shape: [batch_size, beam_width, vocab_size]
    step_log_probs = nn_ops.log_softmax(logits)
    step_log_probs = _mask_probs(step_log_probs, end_token,
                                 previously_finished)
    total_probs = array_ops.expand_dims(beam_state.log_probs,
                                        2) + step_log_probs

    # Calculate the continuation lengths by adding to all continuing beams.
    vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1]
    lengths_to_add = array_ops.one_hot(indices=array_ops.fill(
        [batch_size, beam_width], end_token),
                                       depth=vocab_size,
                                       on_value=np.int64(0),
                                       off_value=np.int64(1),
                                       dtype=dtypes.int64)
    add_mask = math_ops.to_int64(not_finished)
    lengths_to_add *= array_ops.expand_dims(add_mask, 2)
    new_prediction_lengths = (lengths_to_add +
                              array_ops.expand_dims(prediction_lengths, 2))

    # Calculate the accumulated attention probabilities if coverage penalty is
    # enabled.
    accumulated_attention_probs = None
    attention_probs = get_attention_probs(next_cell_state,
                                          coverage_penalty_weight)
    if attention_probs is not None:
        attention_probs *= array_ops.expand_dims(
            math_ops.to_float(not_finished), 2)
        accumulated_attention_probs = (beam_state.accumulated_attention_probs +
                                       attention_probs)

    # Calculate the scores for each beam
    scores = _get_scores(
        log_probs=total_probs,
        sequence_lengths=new_prediction_lengths,
        length_penalty_weight=length_penalty_weight,
        coverage_penalty_weight=coverage_penalty_weight,
        finished=previously_finished,
        accumulated_attention_probs=accumulated_attention_probs)

    time = ops.convert_to_tensor(time, name="time")
    # During the first time step we only consider the initial beam
    scores_flat = array_ops.reshape(scores, [batch_size, -1])

    # Pick the next beams according to the specified successors function
    next_beam_size = ops.convert_to_tensor(beam_width,
                                           dtype=dtypes.int32,
                                           name="beam_width")
    next_beam_scores, word_indices = nn_ops.top_k(scores_flat,
                                                  k=next_beam_size)

    next_beam_scores.set_shape([static_batch_size, beam_width])
    word_indices.set_shape([static_batch_size, beam_width])

    # Pick out the probs, beam_ids, and states according to the chosen predictions
    next_beam_probs = _tensor_gather_helper(gather_indices=word_indices,
                                            gather_from=total_probs,
                                            batch_size=batch_size,
                                            range_size=beam_width * vocab_size,
                                            gather_shape=[-1],
                                            name="next_beam_probs")
    # Note: just doing the following
    #   math_ops.to_int32(word_indices % vocab_size,
    #       name="next_beam_word_ids")
    # would be a lot cleaner but for reasons unclear, that hides the results of
    # the op which prevents capturing it with tfdbg debug ops.
    raw_next_word_ids = math_ops.mod(word_indices,
                                     vocab_size,
                                     name="next_beam_word_ids")
    next_word_ids = math_ops.to_int32(raw_next_word_ids)
    next_beam_ids = math_ops.to_int32(word_indices / vocab_size,
                                      name="next_beam_parent_ids")

    # Append new ids to current predictions
    previously_finished = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=previously_finished,
        batch_size=batch_size,
        range_size=beam_width,
        gather_shape=[-1])
    next_finished = math_ops.logical_or(previously_finished,
                                        math_ops.equal(next_word_ids,
                                                       end_token),
                                        name="next_beam_finished")

    # Calculate the length of the next predictions.
    # 1. Finished beams remain unchanged.
    # 2. Beams that are now finished (EOS predicted) have their length
    #    increased by 1.
    # 3. Beams that are not yet finished have their length increased by 1.
    lengths_to_add = math_ops.to_int64(
        math_ops.logical_not(previously_finished))
    next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids,
                                                gather_from=beam_state.lengths,
                                                batch_size=batch_size,
                                                range_size=beam_width,
                                                gather_shape=[-1])
    next_prediction_len += lengths_to_add
    next_accumulated_attention_probs = ()
    if accumulated_attention_probs is not None:
        next_accumulated_attention_probs = _tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=accumulated_attention_probs,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1],
            name="next_accumulated_attention_probs")

    # Pick out the cell_states according to the next_beam_ids. We use a
    # different gather_shape here because the cell_state tensors, i.e.
    # the tensors that would be gathered from, all have dimension
    # greater than two and we need to preserve those dimensions.
    # pylint: disable=g-long-lambda
    next_cell_state = nest.map_structure(
        lambda gather_from: _maybe_tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=gather_from,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1]), next_cell_state)
    # pylint: enable=g-long-lambda

    next_state = BeamSearchDecoderState(
        cell_state=next_cell_state,
        log_probs=next_beam_probs,
        lengths=next_prediction_len,
        finished=next_finished,
        accumulated_attention_probs=next_accumulated_attention_probs)

    output = BeamSearchDecoderOutput(scores=next_beam_scores,
                                     predicted_ids=next_word_ids,
                                     parent_ids=next_beam_ids)

    return output, next_state
Beispiel #35
0
def _train_deeplab_model(iterator, num_of_classes, ignore_label):
    """Trains the deeplab model.

  Args:
    iterator: An iterator of type tf.data.Iterator for images and labels.
    num_of_classes: Number of classes for the dataset.
    ignore_label: Ignore label for the dataset.

  Returns:
    train_tensor: A tensor to update the model variables.
    summary_op: An operation to log the summaries.
  """
    global_step = tf.train.get_or_create_global_step()
    summaries = []

    learning_rate = train_utils.get_model_learning_rate(
        FLAGS.learning_policy, FLAGS.base_learning_rate,
        FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor,
        FLAGS.training_number_of_steps, FLAGS.learning_power,
        FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
    summaries.append(tf.summary.scalar('learning_rate', learning_rate))

    optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum)

    tower_grads = []
    tower_summaries = None
    for i in range(FLAGS.num_clones):
        with tf.device('/gpu:%d' % i):
            with tf.name_scope('clone_%d' % i) as scope:
                loss = _tower_loss(iterator=iterator,
                                   num_of_classes=num_of_classes,
                                   ignore_label=ignore_label,
                                   scope=scope,
                                   reuse_variable=(i != 0))
                grads = optimizer.compute_gradients(loss)
                tower_grads.append(grads)

                # Retain the summaries from the first tower.
                if not i:
                    tower_summaries = tf.summary.merge_all(scope=scope)

    with tf.device('/cpu:0'):
        grads_and_vars = _average_gradients(tower_grads)
        if tower_summaries is not None:
            summaries.append(tower_summaries)

        # Modify the gradients for biases and last layer variables.
        last_layers = model.get_extra_layer_scopes(
            FLAGS.last_layers_contain_logits_only)
        grad_mult = train_utils.get_model_gradient_multipliers(
            last_layers, FLAGS.last_layer_gradient_multiplier)
        if grad_mult:
            grads_and_vars = tf.contrib.training.multiply_gradients(
                grads_and_vars, grad_mult)

        # Create gradient update op.
        grad_updates = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

        # Gather update_ops. These contain, for example,
        # the updates for the batch_norm variables created by model_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        update_ops.append(grad_updates)
        update_op = tf.group(*update_ops)

        total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

        # Print total loss to the terminal.
        # This implementation is mirrored from tf.slim.summaries.
        should_log = math_ops.equal(math_ops.mod(global_step, FLAGS.log_steps),
                                    0)
        total_loss = tf.cond(
            should_log,
            lambda: tf.Print(total_loss, [total_loss], 'Total loss is :'),
            lambda: total_loss)

        summaries.append(tf.summary.scalar('total_loss', total_loss))

        with tf.control_dependencies([update_op]):
            train_tensor = tf.identity(total_loss, name='train_op')
        summary_op = tf.summary.merge(summaries)

    return train_tensor, summary_op
Beispiel #36
0
 def _maybe_expand_weights():
     expand_weights = lambda: array_ops.expand_dims(sample_weight, [-1])
     return control_flow_ops.cond(math_ops.equal(rank_diff, -1),
                                  expand_weights, lambda: sample_weight)
Beispiel #37
0
def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None):
    """Squeeze or expand last dimension if needed.

  1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1
  (using `remove_squeezable_dimensions`).
  2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1
  from the new rank of `y_pred`.
  If `sample_weight` is scalar, it is kept scalar.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    y_pred: Predicted values, a `Tensor` of arbitrary dimensions.
    y_true: Optional label `Tensor` whose dimensions match `y_pred`.
    sample_weight: Optional weight scalar or `Tensor` whose dimensions match
      `y_pred`.

  Returns:
    Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has
    the last dimension squeezed,
    `sample_weight` could be extended by one dimension.
    If `sample_weight` is None, (y_pred, y_true) is returned.
  """
    y_pred_shape = y_pred.shape
    y_pred_rank = y_pred_shape.ndims
    if y_true is not None:

        # If sparse matrix is provided as `y_true`, the last dimension in `y_pred`
        # may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)),
        # y_pred = [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3))
        # In this case, we should not try to remove squeezable dimension.
        y_true_shape = y_true.shape
        y_true_rank = y_true_shape.ndims
        if (y_true_rank is not None) and (y_pred_rank is not None):
            # Use static rank for `y_true` and `y_pred`.
            if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1:
                y_true, y_pred = remove_squeezable_dimensions(y_true, y_pred)
        else:
            # Use dynamic rank.
            rank_diff = array_ops.rank(y_pred) - array_ops.rank(y_true)
            squeeze_dims = lambda: remove_squeezable_dimensions(  # pylint: disable=g-long-lambda
                y_true, y_pred)
            is_last_dim_1 = math_ops.equal(1, array_ops.shape(y_pred)[-1])
            maybe_squeeze_dims = lambda: control_flow_ops.cond(  # pylint: disable=g-long-lambda
                is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred))
            y_true, y_pred = control_flow_ops.cond(
                math_ops.equal(1, rank_diff), maybe_squeeze_dims, squeeze_dims)

    if sample_weight is None:
        return y_pred, y_true

    weights_shape = sample_weight.shape
    weights_rank = weights_shape.ndims
    if weights_rank == 0:  # If weights is scalar, do nothing.
        return y_pred, y_true, sample_weight

    if (y_pred_rank is not None) and (weights_rank is not None):
        # Use static rank.
        if weights_rank - y_pred_rank == 1:
            sample_weight = array_ops.squeeze(sample_weight, [-1])
        elif y_pred_rank - weights_rank == 1:
            sample_weight = array_ops.expand_dims(sample_weight, [-1])
        return y_pred, y_true, sample_weight

    # Use dynamic rank.
    weights_rank_tensor = array_ops.rank(sample_weight)
    rank_diff = weights_rank_tensor - array_ops.rank(y_pred)
    maybe_squeeze_weights = lambda: array_ops.squeeze(sample_weight, [-1])

    def _maybe_expand_weights():
        expand_weights = lambda: array_ops.expand_dims(sample_weight, [-1])
        return control_flow_ops.cond(math_ops.equal(rank_diff, -1),
                                     expand_weights, lambda: sample_weight)

    def _maybe_adjust_weights():
        return control_flow_ops.cond(math_ops.equal(rank_diff,
                                                    1), maybe_squeeze_weights,
                                     _maybe_expand_weights)

    # squeeze or expand last dim of `sample_weight` if its rank differs by 1
    # from the new rank of `y_pred`.
    sample_weight = control_flow_ops.cond(
        math_ops.equal(weights_rank_tensor, 0), lambda: sample_weight,
        _maybe_adjust_weights)
    return y_pred, y_true, sample_weight
Beispiel #38
0
def lifted_struct_loss(labels, embeddings, margin=1.0):
    """Computes the lifted structured loss.
      The loss encourages the positive distances (between a pair of embeddings
      with the same labels) to be smaller than any negative distances (between a
      pair of embeddings with different labels) in the mini-batch in a way
      that is differentiable with respect to the embedding vectors.
      See: https://arxiv.org/abs/1511.06452.
      Args:
        labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
          multiclass integer labels.
        embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
          be l2 normalized.
        margin: Float, margin term in the loss definition.
      Returns:
        lifted_loss: tf.float32 scalar.
    """
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pairwise_distances = pairwise_distance(embeddings)

    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    diff = margin - pairwise_distances
    mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    # Safe maximum: Temporarily shift negative distances
    #   above zero before taking max.
    #     this is to take the max only among negatives.
    row_minimums = math_ops.reduce_min(diff, 1, keep_dims=True)
    row_negative_maximums = math_ops.reduce_max(
        math_ops.multiply(diff - row_minimums,
                          mask), 1, keep_dims=True) + row_minimums

    max_elements = math_ops.maximum(row_negative_maximums,
                                    array_ops.transpose(row_negative_maximums))
    diff_tiled = array_ops.tile(diff, [batch_size, 1])
    mask_tiled = array_ops.tile(mask, [batch_size, 1])
    max_elements_vect = array_ops.reshape(array_ops.transpose(max_elements),
                                          [-1, 1])

    loss_exp_left = array_ops.reshape(
        math_ops.reduce_sum(math_ops.multiply(
            math_ops.exp(diff_tiled - max_elements_vect), mask_tiled),
                            1,
                            keep_dims=True), [batch_size, batch_size])

    loss_mat = max_elements + math_ops.log(loss_exp_left +
                                           array_ops.transpose(loss_exp_left))
    # Add the positive distance.
    loss_mat += pairwise_distances

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
    num_positives = math_ops.reduce_sum(mask_positives) / 2.0

    lifted_loss = math_ops.truediv(0.25 * math_ops.reduce_sum(
        math_ops.square(
            math_ops.maximum(math_ops.multiply(loss_mat, mask_positives),
                             0.0))),
                                   num_positives,
                                   name='liftedstruct_loss')
    return lifted_loss
Beispiel #39
0
    def broadcast_dimension(self, axis, lengths):
        """Returns a shape that is broadcast-compatible with self & lengths.

    * If dimension[axis] is uniform and lengths is a scalar, the check
      that either lengths==1 or axis==1 or lengths==axis, and tile
      dimension[axis] with tf.where(lengths==axis, 1, axis) repeats.

    * If dimension[axis] is uniform and lengths is a vector, then check
      that dimension[axis]==1, and raggedly tile dimension[axis] with
      lengths repeats.  (we can skip tiling if we statically know that
      slice_lengths == 1??)

    * If dimension[axis] is ragged and lengths is a scalar, then check
      that lengths==1.

    * If dimension[axis] is ragged and lengths is a vector, then check
      that self.dimension_size(axis) == lengths.

    Args:
      axis: `int`.  The dimension to broadcast.
      lengths: 0-D or 1-D integer `Tensor`.

    Returns:
      A `RaggedTensorDynamicShape`.
    """
        lengths = ragged_util.convert_to_int_tensor(lengths,
                                                    name='lengths',
                                                    dtype=dtypes.int64)
        # Check whether lengths is a scalar (for uniform dimensions) or
        # vector (for ragged dimensions).
        if lengths.shape.ndims is None:
            raise ValueError('lengths must have a known rank.')
        elif lengths.shape.ndims > 1:
            raise ValueError('lengths must be a scalar or vector')
        else:
            lengths_is_scalar = (lengths.shape.ndims == 0)

        # Verify that the shapes are compatible.
        if self.is_ragged(axis):
            if lengths_is_scalar:
                condition = math_ops.equal(lengths, 1)
            else:
                condition = math_ops.reduce_all(
                    math_ops.equal(lengths, self.dimension_size(axis)))
        else:
            axis_dim_size = self.dimension_size(axis)
            if lengths_is_scalar:
                condition = (math_ops.equal(lengths, 1)
                             | math_ops.equal(axis_dim_size, 1)
                             | math_ops.equal(axis_dim_size, lengths))
            else:
                condition = math_ops.equal(axis_dim_size, 1)
        broadcast_err = [
            'Unable to broadcast: dimension size mismatch in dimension', axis,
            'lengths=', lengths, 'dim_size=',
            self.dimension_size(axis)
        ]
        broadcast_check = control_flow_ops.Assert(condition,
                                                  data=broadcast_err,
                                                  summarize=10)

        with ops.control_dependencies([broadcast_check]):
            # Partitioned dimensions:
            if axis < self.num_partitioned_dimensions:
                if self.is_ragged(axis):
                    # Use an identity op to make sure the check actually gets run.
                    return RaggedTensorDynamicShape(
                        self._partitioned_dim_sizes,
                        array_ops.identity(self.inner_dim_sizes))
                else:
                    return self._broadcast_uniform_partitioned_dimension(
                        axis, lengths)

            # Inner dimensions:
            else:
                if lengths_is_scalar:
                    return self._broadcast_inner_dimension_to_uniform(
                        axis, lengths)
                else:
                    if axis == 0:
                        raise ValueError(
                            'Unable to broadcast: '
                            'outermost dimension must be uniform.')
                    return self._broadcast_inner_dimension_to_ragged(
                        axis, lengths)
Beispiel #40
0
def _broadcast_to_ragged_shape(rt_input, dst_shape,
                               broadcast_inner_dimensions):
    """Broadcasts rt_input to the ragged shape `dst_shape`."""
    # dst_shape's rank and ragged_rank must be greater than or equal to rt_input's
    if rt_input.shape.ndims is None or dst_shape.rank is None:
        raise ValueError('Unable to broadcast: unknown rank')
    if rt_input.shape.ndims > dst_shape.rank:
        raise ValueError('Incompatible with shape: rank mismatch')
    if (isinstance(rt_input, ragged_tensor.RaggedTensor)
            and rt_input.ragged_rank >= dst_shape.num_partitioned_dimensions):
        raise ValueError('Incompatible with shape: ragged rank mismatch')

    src_shape = RaggedTensorDynamicShape.from_tensor(rt_input)
    src_shape = src_shape.broadcast_to_rank(dst_shape.rank)

    # Add dimensions to rt_input so its rank and ragged_rank matches dst_shape.
    if dst_shape.rank > rt_input.shape.ndims:
        if rt_input.shape.ndims < dst_shape.num_inner_dimensions + 1:
            rt_input = array_ops.reshape(
                rt_input,
                array_ops.concat([[-1], dst_shape.inner_dim_sizes], axis=0))
        for _ in range(dst_shape.rank - rt_input.shape.ndims):
            rt_input = ragged_factory_ops.from_row_lengths(
                rt_input, [ragged_array_ops.nrows(rt_input)])

    # Add ragged dimensions to match dst_shape.
    if ragged_tensor.is_ragged(rt_input):
        inner_rank_diff = (rt_input.inner_values.shape.ndims - 1 -
                           dst_shape.num_inner_dimensions)
        if inner_rank_diff > 0:
            rt_input = rt_input.with_inner_values(
                ragged_conversion_ops.from_tensor(rt_input.inner_values,
                                                  ragged_rank=inner_rank_diff))
    else:
        rt_input = ragged_conversion_ops.from_tensor(
            rt_input, ragged_rank=dst_shape.num_partitioned_dimensions - 1)

    # Do broadcasting for any dimensions that will remain uniform.  We can do
    # these all at once, since they're independent of one another.
    multiples = [1] * dst_shape.rank
    for axis in range(dst_shape.num_partitioned_dimensions):
        if not src_shape.is_ragged(axis) and not dst_shape.is_ragged(axis):
            src_size = src_shape.dimension_size(axis)
            dst_size = dst_shape.dimension_size(axis)
            if ((tensor_util.constant_value(src_size) in (1, None))
                    and (tensor_util.constant_value(dst_size) != 1)):
                multiples[axis] = array_ops.where(math_ops.equal(src_size, 1),
                                                  dst_size, 1)
    if not all(isinstance(v, int) and v == 1 for v in multiples):
        multiples = array_ops.stack(multiples, axis=0)
        rt_input = ragged_array_ops.tile(rt_input, multiples)

    if broadcast_inner_dimensions:
        rt_input = rt_input.with_inner_values(
            array_ops.reshape(
                rt_input.inner_values,
                array_ops.concat([[-1], dst_shape.inner_dim_sizes], axis=0)))

    # Do broadcasting for dimensions that become ragged.  We must do these from
    # outermost to innermost.
    for axis in range(dst_shape.num_partitioned_dimensions):
        if not src_shape.is_ragged(axis) and dst_shape.is_ragged(axis):
            dst_size = dst_shape.dimension_size(axis)
            rt_input = _ragged_tile_axis(rt_input, axis, dst_size)

    return rt_input
def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
    """Assert the condition `x == y` holds element-wise.

  Example of adding a dependency to an operation:

  ```python
  with tf.control_dependencies([tf.assert_equal(x, y)]):
    output = tf.reduce_sum(x)
  ```

  This condition holds if for every pair of (possibly broadcast) elements
  `x[i]`, `y[i]`, we have `x[i] == y[i]`.
  If both `x` and `y` are empty, this is trivially satisfied.

  Args:
    x:  Numeric `Tensor`.
    y:  Numeric `Tensor`, same dtype as and broadcastable to `x`.
    data:  The tensors to print out if the condition is False.  Defaults to
      error message and first few entries of `x`, `y`.
    summarize: Print this many entries of each tensor.
    message: A string to prefix to the default message.
    name: A name for this operation (optional).  Defaults to "assert_equal".

  Returns:
    Op that raises `InvalidArgumentError` if `x == y` is False.
    @compatibility{eager} returns None

  Raises:
    InvalidArgumentError: if the check can be performed immediately and
      `x == y` is False. The check can be performed immediately during eager
      execution or if `x` and `y` are statically known.
  """
    message = message or ''
    with ops.name_scope(name, 'assert_equal', [x, y, data]):
        x = ops.convert_to_tensor(x, name='x')
        y = ops.convert_to_tensor(y, name='y')

        if context.in_eager_mode():
            eq = math_ops.equal(x, y)
            condition = math_ops.reduce_all(eq)
            if not condition:
                # Prepare a message with first elements of x and y.
                summary_msg = ''
                # Default to printing 3 elements like control_flow_ops.Assert (used
                # by graph mode) does.
                summarize = 3 if summarize is None else summarize
                if summarize:
                    # reshape((-1,)) is the fastest way to get a flat array view.
                    x_np = x.numpy().reshape((-1, ))
                    y_np = y.numpy().reshape((-1, ))
                    x_sum = min(x_np.size, summarize)
                    y_sum = min(y_np.size, summarize)
                    summary_msg = ('First %d elements of x:\n%s\n'
                                   'First %d elements of y:\n%s\n' %
                                   (x_sum, x_np[:x_sum], y_sum, y_np[:y_sum]))

                # Get the values that actually differed and their indices.
                mask = math_ops.logical_not(eq)
                indices = array_ops.where(mask)
                indices_np = indices.numpy()
                x_vals = array_ops.boolean_mask(x, mask)
                y_vals = array_ops.boolean_mask(y, mask)
                summarize = min(summarize, indices_np.shape[0])

                raise errors.InvalidArgumentError(
                    node_def=None,
                    op=None,
                    message=('%s\nCondition x == y did not hold.\n'
                             'Indices of first %s different values:\n%s\n'
                             'Corresponding x values:\n%s\n'
                             'Corresponding y values:\n%s\n'
                             '%s' %
                             (message or '', summarize, indices_np[:summarize],
                              x_vals.numpy().reshape(
                                  (-1, ))[:summarize], y_vals.numpy().reshape(
                                      (-1, ))[:summarize], summary_msg)))
            return

        if data is None:
            data = [
                message, 'Condition x == y did not hold element-wise:',
                'x (%s) = ' % x.name, x,
                'y (%s) = ' % y.name, y
            ]
        condition = math_ops.reduce_all(math_ops.equal(x, y))
        x_static = tensor_util.constant_value(x)
        y_static = tensor_util.constant_value(y)
        if x_static is not None and y_static is not None:
            condition_static = (x_static == y_static).all()
            _assert_static(condition_static, data)
        return control_flow_ops.Assert(condition, data, summarize=summarize)
Beispiel #42
0
def build_collective_gather_indexed_slices(input_slices_list,
                                           devices,
                                           group_size,
                                           collective_keys,
                                           communication_hint='AUTO',
                                           control_inputs=None,
                                           timeout=None):
    """Build a subgraph that all-gathers IndexedSlices using the collective Op.

  This method must be called in graph mode or inside a tf.function.

  Args:
    input_slices_list: a list of IndexedSlices within a single worker graph that
      are to be gathered together; must be one per device.
    devices: a list of device strings to run the collective on.
    group_size: total number of devices globally that will be doing this same
      gathering. The gathering will actually include the corresponding tensors
      at all these workers.
    collective_keys: a CollectiveKeys object.
    communication_hint: string providing hint to runtime for choosing collective
      implementation.
    control_inputs: if not None, add control edges between control_inputs and
      (index-wise) corresponding collective_reduce tensors
    timeout: a float or None. The timeout in seconds.

  Returns:
    An array of final IndexedSlices, one per device, computed by the full
    gather.

  Raises:
    ValueError: if control_inputs is not None and doesn't match the length and
      devices of inputs.
  """
    assert not context.executing_eagerly(), (
        'build_collective_gather_indexed_slices can only be called in graph mode'
        ' or inside tf.function')
    if len(input_slices_list) != len(devices):
        raise ValueError(
            'collective requires one input IndexedSlice for each device, %d != %d'
            % (len(input_slices_list), len(devices)))

    if group_size < 2:
        return input_slices_list

    group_key = collective_keys.get_group_key(devices)
    gather_length_key = collective_keys.get_op_instance_key()
    gather_indices_key = collective_keys.get_op_instance_key()
    gather_values_key = collective_keys.get_op_instance_key()
    reduce_densified_key = collective_keys.get_op_instance_key()

    # Current CollectiveAllGather implementations require input IndexedSlices to
    # have consistent length across the board, we handle the reduction of
    # IndexedSlices as follows:
    #   1. Gather the lengths of IndexedSlices from all participants.
    #   2. If they have consistent length, apply all_gather.
    #   3. Otherwise convert IndexedSlices to dense tensors and apply
    #      all_reduce.
    out_slices_list = []
    for idx, input_slices in enumerate(input_slices_list):
        # pylint: disable = cell-var-from-loop
        with ops.device(devices[idx]):

            def all_gather():
                """Use all_gather to aggregate `IndexedSlices`."""
                all_values = collective_ops.all_gather(input_slices.values,
                                                       group_size,
                                                       group_key,
                                                       gather_values_key,
                                                       communication_hint,
                                                       timeout=timeout)
                # Add control dependency to order the all-gather.
                control = [all_values] if communication_hint == 'NCCL' else []
                with ops.control_dependencies(control):
                    all_indices = collective_ops.all_gather(
                        input_slices.indices,
                        group_size,
                        group_key,
                        gather_indices_key,
                        communication_hint,
                        timeout=timeout)
                return ops.IndexedSlices(values=all_values,
                                         indices=all_indices,
                                         dense_shape=input_slices.dense_shape)

            def densify_and_all_reduce():
                """Use all_reduce to aggregate `IndexedSlices`."""
                densified = ops.convert_to_tensor(input_slices)
                reduced = collective_ops.all_reduce(densified,
                                                    group_size,
                                                    group_key,
                                                    reduce_densified_key,
                                                    'Add',
                                                    'Id', [0],
                                                    communication_hint,
                                                    timeout=timeout)
                # We have to convert dense grad to IndexedSlice because all_reduce()
                # and all_gather() must have the same return type as required by
                # control_flow_ops.cond.
                return ops.IndexedSlices(values=reduced,
                                         indices=math_ops.range(
                                             array_ops.shape(reduced)[0]),
                                         dense_shape=input_slices.dense_shape)

            length = array_ops.shape(input_slices.indices)
            with ops.control_dependencies(
                    _control_input(input_slices, control_inputs, idx)):
                all_lengths = collective_ops.all_gather(length,
                                                        group_size,
                                                        group_key,
                                                        gather_length_key,
                                                        communication_hint,
                                                        timeout=timeout)
            out_slices = control_flow_ops.cond(
                math_ops.equal(math_ops.reduce_max(all_lengths),
                               math_ops.reduce_min(all_lengths)), all_gather,
                densify_and_all_reduce)
            out_slices_list.append(out_slices)
        # pylint: enable=cell-var-from-loop
    return out_slices_list
Beispiel #43
0
def stratified_sample(tensors,
                      labels,
                      target_probs,
                      batch_size,
                      init_probs=None,
                      enqueue_many=False,
                      queue_capacity=16,
                      threads_per_queue=1,
                      name=None):
  """Stochastically creates batches based on per-class probabilities.

  This method discards examples. Internally, it creates one queue to amortize
  the cost of disk reads, and one queue to hold the properly-proportioned
  batch.

  Args:
    tensors: List of tensors for data. All tensors are either one item or a
        batch, according to enqueue_many.
    labels: Tensor for label of data. Label is a single integer or a batch,
        depending on `enqueue_many`. It is not a one-hot vector.
    target_probs: Target class proportions in batch. An object whose type has a
        registered Tensor conversion function.
    batch_size: Size of batch to be returned.
    init_probs: Class proportions in the data. An object whose type has a
        registered Tensor conversion function, or `None` for estimating the
        initial distribution.
    enqueue_many: Bool. If true, interpret input tensors as having a batch
        dimension.
    queue_capacity: Capacity of the large queue that holds input examples.
    threads_per_queue: Number of threads for the large queue that holds input
        examples and for the final queue with the proper class proportions.
    name: Optional prefix for ops created by this function.
  Raises:
    ValueError: If `tensors` isn't iterable.
    ValueError: `enqueue_many` is True and labels doesn't have a batch
        dimension, or if `enqueue_many` is False and labels isn't a scalar.
    ValueError: `enqueue_many` is True, and batch dimension on data and labels
        don't match.
    ValueError: if probs don't sum to one.
    ValueError: if a zero initial probability class has a nonzero target
        probability.
    TFAssertion: if labels aren't integers in [0, num classes).
  Returns:
    (data_batch, label_batch), where data_batch is a list of tensors of the same
        length as `tensors`

  Example:
    # Get tensor for a single data and label example.
    data, label = data_provider.Get(['data', 'label'])

    # Get stratified batch according to per-class probabilities.
    target_probs = [...distribution you want...]
    [data_batch], labels = tf.contrib.training.stratified_sample(
        [data], label, target_probs)

    # Run batch through network.
    ...
  """
  with ops.name_scope(name, 'stratified_sample', list(tensors) + [labels]):
    tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensors)
    labels = ops.convert_to_tensor(labels)
    target_probs = ops.convert_to_tensor(target_probs, dtype=dtypes.float32)
    # Reduce the case of a single example to that of a batch of size 1.
    if not enqueue_many:
      tensor_list = [array_ops.expand_dims(tensor, 0) for tensor in tensor_list]
      labels = array_ops.expand_dims(labels, 0)

    # If `init_probs` is `None`, set up online estimation of data distribution.
    if init_probs is None:
      # We use `target_probs` to get the number of classes, so its shape must be
      # fully defined at graph construction time.
      target_probs.get_shape().assert_is_fully_defined()
      init_probs = _estimate_data_distribution(
          labels, target_probs.get_shape().num_elements())
    else:
      init_probs = ops.convert_to_tensor(init_probs, dtype=dtypes.float32)

    # Validate that input is consistent.
    tensor_list, labels, [init_probs, target_probs] = _verify_input(
        tensor_list, labels, [init_probs, target_probs])

    # Check that all zero initial probabilities also have zero target
    # probabilities.
    assert_op = control_flow_ops.Assert(
        math_ops.reduce_all(
            math_ops.logical_or(
                math_ops.not_equal(init_probs, 0),
                math_ops.equal(target_probs, 0))),
        ['All classes with zero initial probability must also have zero target '
         'probability: ', init_probs, target_probs
        ])
    init_probs = control_flow_ops.with_dependencies([assert_op], init_probs)

    # Calculate acceptance sampling probabilities.
    accept_probs = _calculate_acceptance_probabilities(init_probs, target_probs)
    proportion_rejected = math_ops.reduce_sum((1 - accept_probs) * init_probs)
    accept_probs = control_flow_ops.cond(
        math_ops.less(proportion_rejected, .5),
        lambda: accept_probs,
        lambda: logging_ops.Print(  # pylint: disable=g-long-lambda
            accept_probs, [accept_probs],
            message='Proportion of examples rejected by sampler is high.',
            first_n=10))

    # Make a single queue to hold input examples. Reshape output so examples
    # don't have singleton batch dimension.
    batched = input_ops.batch(
        tensor_list + [labels],
        batch_size=1,
        num_threads=threads_per_queue,
        capacity=queue_capacity,
        enqueue_many=True)
    val_list = [array_ops.squeeze(x, [0]) for x in batched[:-1]]
    label = array_ops.squeeze(batched[-1], [0])

    # Set up second queue containing batches that have the desired class
    # proportions.
    cur_prob = array_ops.gather(accept_probs, label)
    batched = input_ops.maybe_batch(
        val_list + [label],
        keep_input=random_ops.random_uniform([]) < cur_prob,
        batch_size=batch_size,
        num_threads=threads_per_queue)
    return batched[:-1], batched[-1]
def assert_broadcastable(weights, values):
    """Asserts `weights` can be broadcast to `values`.

  In `tf.losses` and `tf.metrics`, we support limited weight broadcasting. We
  let weights be either scalar, or the same rank as the target values, with each
  dimension either 1, or the same as the corresponding values dimension.

  Args:
    weights: `Tensor` of weights.
    values: `Tensor` of values to which weights are applied.

  Returns:
    `Operation` raising `InvalidArgumentError` if `weights` has incorrect shape.
    `no_op` if static checks determine `weights` has correct shape.

  Raises:
    ValueError:  If static checks determine `weights` has incorrect shape.
  """
    with ops.name_scope(None, "assert_broadcastable",
                        (weights, values)) as scope:
        with ops.name_scope(None, "weights", (weights, )) as weights_scope:
            weights = ops.convert_to_tensor(weights, name=weights_scope)
            weights_shape = array_ops.shape(weights, name="shape")
            weights_rank = array_ops.rank(weights, name="rank")
        weights_rank_static = tensor_util.constant_value(weights_rank)

        with ops.name_scope(None, "values", (values, )) as values_scope:
            values = ops.convert_to_tensor(values, name=values_scope)
            values_shape = array_ops.shape(values, name="shape")
            values_rank = array_ops.rank(values, name="rank")
        values_rank_static = tensor_util.constant_value(values_rank)

        # Try static checks.
        if weights_rank_static is not None and values_rank_static is not None:
            if weights_rank_static == 0:
                return control_flow_ops.no_op(
                    name="static_scalar_check_success")
            if weights_rank_static != values_rank_static:
                raise ValueError(
                    "%s values.rank=%s. weights.rank=%s."
                    " values.shape=%s. weights.shape=%s." %
                    (_ASSERT_BROADCASTABLE_ERROR_PREFIX, values_rank_static,
                     weights_rank_static, values.shape, weights.shape))
            weights_shape_static = tensor_util.constant_value(weights_shape)
            values_shape_static = tensor_util.constant_value(values_shape)
            if weights_shape_static is not None and values_shape_static is not None:
                # Sanity check, this should always be true since we checked rank above.
                ndims = len(values_shape_static)
                assert ndims == len(weights_shape_static)

                for i in range(ndims):
                    if weights_shape_static[i] not in (1,
                                                       values_shape_static[i]):
                        raise ValueError(
                            "%s Mismatch at dim %s. values.shape=%s weights.shape=%s."
                            % (_ASSERT_BROADCASTABLE_ERROR_PREFIX, i,
                               values_shape_static, weights_shape_static))
                return control_flow_ops.no_op(name="static_dims_check_success")

        # Dynamic checks.
        is_scalar = math_ops.equal(0, weights_rank, name="is_scalar")
        data = (
            _ASSERT_BROADCASTABLE_ERROR_PREFIX,
            "weights.shape=",
            weights.name,
            weights_shape,
            "values.shape=",
            values.name,
            values_shape,
            "is_scalar=",
            is_scalar,
        )
        is_valid_shape = control_flow_ops.cond(
            is_scalar,
            lambda: is_scalar,
            lambda: _has_valid_nonscalar_shape(  # pylint: disable=g-long-lambda
                weights_rank, weights_shape, values_rank, values_shape),
            name="is_valid_shape")
        return control_flow_ops.Assert(is_valid_shape, data, name=scope)
def tfassert_eq(_):
    x = array_ops.placeholder(dtypes.int32, name='x_hold')
    y = array_ops.placeholder(dtypes.int32, name='y_hold')
    control_flow_ops.Assert(math_ops.equal(x, y), ['Expected x == y.'],
                            name='assert_eq')
    math_ops.add(x, math_ops.negative(y), name='x_y_diff')
Beispiel #46
0
def npairs_loss(labels,
                embeddings_anchor,
                embeddings_positive,
                reg_lambda=3e-3,
                print_losses=False):
    """Computes the npairs loss.
          Npairs loss expects paired data where a pair is composed of samples from the
          same labels and each pairs in the minibatch have different labels. The loss
          has two components. The first component is the L2 regularizer on the
          embedding vectors. The second component is the sum of cross entropy loss
          which takes each row of the pair-wise similarity matrix as logits and
          the remapped one-hot labels as labels.
          See:
          http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf
          Args:
            labels: 1-D tf.int32 `Tensor` of shape [batch_size/2].
            embeddings_anchor: 2-D Tensor of shape [batch_size/2, embedding_dim] for the
              embedding vectors for the anchor images. Embeddings should not be
              l2 normalized.
            embeddings_positive: 2-D Tensor of shape [batch_size/2, embedding_dim] for the
              embedding vectors for the positive images. Embeddings should not be
              l2 normalized.
            reg_lambda: Float. L2 regularization term on the embedding vectors.
            print_losses: Boolean. Option to print the xent and l2loss.
          Returns:
            npairs_loss: tf.float32 scalar.
      """
    # pylint: enable=line-too-long
    # Add the regularizer on the embedding.
    reg_anchor = math_ops.reduce_mean(
        math_ops.reduce_sum(math_ops.square(embeddings_anchor), 1))
    reg_positive = math_ops.reduce_mean(
        math_ops.reduce_sum(math_ops.square(embeddings_positive), 1))
    l2loss = math_ops.multiply(0.25 * reg_lambda,
                               reg_anchor + reg_positive,
                               name='l2loss')

    # Get per pair similarities.
    similarity_matrix = math_ops.matmul(embeddings_anchor,
                                        embeddings_positive,
                                        transpose_a=False,
                                        transpose_b=True)

    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    labels_remapped = math_ops.to_float(
        math_ops.equal(labels, array_ops.transpose(labels)))
    labels_remapped /= math_ops.reduce_sum(labels_remapped, 1, keep_dims=True)

    # Add the softmax loss.
    xent_loss = nn.softmax_cross_entropy_with_logits(logits=similarity_matrix,
                                                     labels=labels_remapped)
    xent_loss = math_ops.reduce_mean(xent_loss, name='xentropy')

    if print_losses:
        xent_loss = logging_ops.Print(
            xent_loss, ['cross entropy:', xent_loss, 'l2loss:', l2loss])

    return l2loss + xent_loss
Beispiel #47
0
 def _maybe_adjust_weights():
     return control_flow_ops.cond(math_ops.equal(rank_diff,
                                                 1), maybe_squeeze_weights,
                                  _maybe_expand_weights)
Beispiel #48
0
def triplet_semihard_loss(labels, embeddings, margin=1.0):
    """Computes the triplet loss with semi-hard negative mining.
      The loss encourages the positive distances (between a pair of embeddings with
      the same labels) to be smaller than the minimum negative distance among
      which are at least greater than the positive distance plus the margin constant
      (called semi-hard negative) in the mini-batch. If no such negative exists,
      uses the largest negative distance instead.
      See: https://arxiv.org/abs/1503.03832.
      Args:
        labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
          multiclass integer labels.
        embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
          be l2 normalized.
        margin: Float, margin term in the loss definition.
      Returns:
        triplet_loss: tf.float32 scalar.
    """
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    # Compute the mask.
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile,
            array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32),
                                1,
                                keep_dims=True), 0.0),
        [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(mask_final, negatives_outside,
                                          negatives_inside)

    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)

    _triplet_loss = math_ops.truediv(math_ops.reduce_sum(
        math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0)),
                                     num_positives,
                                     name='triplet_semihard_loss')
    return _triplet_loss
 def equal_(x, y):
     if _is_tensor(x) or _is_tensor(y):
         return math_ops.equal(x, y)
     else:
         return x == y
Beispiel #50
0
    def posterior_from_prior_state(self, prior_state, prior_state_var,
                                   observation, observation_model,
                                   predicted_observations, observation_noise):
        """Compute a posterior over states given an observation.

    Args:
      prior_state: Prior state mean [batch size x state dimension]
      prior_state_var: Prior state covariance [batch size x state dimension x
          state dimension]
      observation: The observed value corresponding to the predictions given
          [batch size x observation dimension]
      observation_model: The [batch size x observation dimension x model state
          dimension] Tensor indicating how a particular state is mapped to
          (pre-noise) observations for each part of the batch.
      predicted_observations: An (observation mean, observation variance) tuple
          computed based on the current state, usually the output of
          observed_from_state.
      observation_noise: A [batch size x observation dimension x observation
          dimension] or [observation dimension x observation dimension] Tensor
          with covariance matrices to use for each part of the batch (a
          two-dimensional input will be broadcast).
    Returns:
      Posterior mean and covariance (dimensions matching the first two
      arguments).

    """
        observed_mean, observed_var = predicted_observations
        residual = observation - observed_mean
        # TODO(allenl): Can more of this be done using matrix_solve_ls?
        kalman_solve_rhs = math_ops.matmul(observation_model,
                                           prior_state_var,
                                           adjoint_b=True)
        # This matrix_solve adjoint doesn't make a difference symbolically (since
        # observed_var is a covariance matrix, and should be symmetric), but
        # filtering on multivariate series is unstable without it. See
        # test_multivariate_symmetric_covariance_float64 in kalman_filter_test.py
        # for an example of the instability (fails with adjoint=False).
        kalman_gain_transposed = linalg_ops.matrix_solve(matrix=observed_var,
                                                         rhs=kalman_solve_rhs,
                                                         adjoint=True)
        posterior_state = prior_state + array_ops.squeeze(math_ops.matmul(
            kalman_gain_transposed,
            array_ops.expand_dims(residual, -1),
            adjoint_a=True),
                                                          squeeze_dims=[-1])
        gain_obs = math_ops.matmul(kalman_gain_transposed,
                                   observation_model,
                                   adjoint_a=True)
        identity_extradim = linalg_ops.eye(array_ops.shape(gain_obs)[1],
                                           dtype=gain_obs.dtype)[None]
        identity_minus_factor = identity_extradim - gain_obs
        if self._simplified_posterior_covariance_computation:
            # posterior covariance =
            #   (I - kalman_gain * observation_model) * prior_state_var
            posterior_state_var = math_ops.matmul(identity_minus_factor,
                                                  prior_state_var)
        else:
            observation_noise = ops.convert_to_tensor(observation_noise)
            # A Joseph form update, which provides better numeric stability than the
            # simplified optimal Kalman gain update, at the cost of a few extra
            # operations. Joseph form updates are valid for any gain (not just the
            # optimal Kalman gain), and so are more forgiving of numerical errors in
            # computing the optimal Kalman gain.
            #
            # posterior covariance =
            #   (I - kalman_gain * observation_model) * prior_state_var
            #     * (I - kalman_gain * observation_model)^T
            #   + kalman_gain * observation_noise * kalman_gain^T
            left_multiplied_state_var = math_ops.matmul(
                identity_minus_factor, prior_state_var)
            multiplied_state_var = math_ops.matmul(identity_minus_factor,
                                                   left_multiplied_state_var,
                                                   adjoint_b=True)

            def _batch_observation_noise_update():
                return (multiplied_state_var + math_ops.matmul(
                    math_ops.matmul(kalman_gain_transposed,
                                    observation_noise,
                                    adjoint_a=True), kalman_gain_transposed))

            def _matrix_observation_noise_update():
                return (multiplied_state_var + math_ops.matmul(
                    math_utils.batch_times_matrix(
                        kalman_gain_transposed, observation_noise, adj_x=True),
                    kalman_gain_transposed))

            if observation_noise.get_shape().ndims is None:
                posterior_state_var = control_flow_ops.cond(
                    math_ops.equal(array_ops.rank(observation_noise),
                                   2), _matrix_observation_noise_update,
                    _batch_observation_noise_update)
            else:
                # If static shape information exists, it gets checked in each cond()
                # branch, so we need a special case to avoid graph-build-time
                # exceptions.
                if observation_noise.get_shape().ndims == 2:
                    posterior_state_var = _matrix_observation_noise_update()
                else:
                    posterior_state_var = _batch_observation_noise_update()
        return posterior_state, posterior_state_var
def lifted_struct_loss(y_true, y_preds, margin=1.0):
    """Computes the lifted structured loss.

    The loss encourages the positive distances (between a pair of embeddings
    with the same labels) to be smaller than any negative distances (between a
    pair of embeddings with different labels) in the mini-batch in a way
    that is differentiable with respect to the embedding vectors.
    See: https://arxiv.org/abs/1511.06452.

    Args:
      labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
        multiclass integer labels.
      embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
        be l2 normalized.
      margin: Float, margin term in the loss definition.

    Returns:
      lifted_loss: tf.float32 scalar.
    """
    labels = y_true
    embeddings = y_preds
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    # assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pairwise_distances = pairwise_distance(embeddings)

    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    diff = margin - pairwise_distances
    mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    # Safe maximum: Temporarily shift negative distances
    #   above zero before taking max.
    #     this is to take the max only among negatives.
    row_minimums = math_ops.reduce_min(diff, 1, keepdims=True)
    row_negative_maximums = (math_ops.reduce_max(
        math_ops.multiply(diff - row_minimums, mask), 1, keepdims=True) +
                             row_minimums)

    # Compute the loss.
    # Keep track of matrix of maximums where M_ij = max(m_i, m_j)
    #   where m_i is the max of alpha - negative D_i's.
    # This matches the Caffe loss layer implementation at:
    #   https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp  # pylint: disable=line-too-long

    max_elements = math_ops.maximum(row_negative_maximums,
                                    array_ops.transpose(row_negative_maximums))
    diff_tiled = array_ops.tile(diff, [batch_size, 1])
    mask_tiled = array_ops.tile(mask, [batch_size, 1])
    max_elements_vect = array_ops.reshape(array_ops.transpose(max_elements),
                                          [-1, 1])

    loss_exp_left = array_ops.reshape(
        math_ops.reduce_sum(
            math_ops.multiply(math_ops.exp(diff_tiled - max_elements_vect),
                              mask_tiled),
            1,
            keepdims=True,
        ),
        [batch_size, batch_size],
    )

    loss_mat = max_elements + math_ops.log(loss_exp_left +
                                           array_ops.transpose(loss_exp_left))
    # Add the positive distance.
    loss_mat += pairwise_distances

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
    num_positives = math_ops.reduce_sum(mask_positives) / 2.0

    lifted_loss = math_ops.truediv(
        0.25 * math_ops.reduce_sum(
            math_ops.square(
                math_ops.maximum(math_ops.multiply(loss_mat, mask_positives),
                                 0.0))),
        num_positives,
        name='liftedstruct_loss',
    )
    return lifted_loss
def indicators_to_sparse_ids(indicators, ignore_value=None, dtype=dtypes.int64):
  """Convert a dense indicator tensor to sparse IDs.

  This is commonly used for converting a dense classification label to sparse.
  In the following example, we have an input of shape (2, 2, num_classes),
  where num_classes=4.

  ```python
  indicators = [
    [
      [0, 0, 1, 0],
      [0, 0, 0, 0]
    ], [
      [1, 0, 1, 1],
      [0, 0, 1, 0]
    ]
  ]
  sparse_ids = indicator_to_sparse_ids(indicators)
  ```

  `sparse_ids` in "jagged" format:
  [
    [
      [2],
      []
    ], [
      [0, 2, 3],
      [2]
    ]
  ]

  `sparse_ids` in `SparseTensor` format:
  ```python
  {
    indices: [[0, 0, 1], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 1, 0]],
    values: [2, 0, 2, 3, 2],
    dense_shape: [2, 2, 3]
  }
  ```

  Args:
    indicators: Dense `Tensor` of shape `(d0, ..., dn, num_classes)`.
      `ignore_value` values are ignored. For other values (typically, ones), the
      index along the last dimension is returned.
    ignore_value: Entries in `indicators` equal to this value will be
      absent from the returned `SparseTensor`. If `None`, default value of
      `indicators` dtype will be used (e.g. '' for `str`, 0 for `int`).
    dtype: Type of result, must be integer type.

  Returns:
    `SparseTensor` of type `dtype` and shape `(d0, ..., dn, max_num_labels)`,
      where `max_num_labels` is the maximum number of non-zero values in any
      row (in the example above, row (1, 1) has 3 non-zero values, so the result
      shape is (2, 2, 3)). The values of this `SparseTensor` are in the range
      `[0, num_classes)` and correspond to the index of non-ignore values along
      the last dimension of `indicators`.

  Raises:
    ValueError: if `dtype` is not integer.
  """
  if not dtype.is_integer:
    raise ValueError("Invalid dtype {} not integer.".format(dtype))
  with ops.name_scope(
      None, "indicators_to_sparse_ids", (indicators, ignore_value)):
    # Convert indicators to binary ones and zeros. We use int64 since
    # SparseTensor requires int64 indices.
    indicators = ops.convert_to_tensor(indicators, name="indicators")
    missing_indicators = math_ops.equal(
        indicators, _ignore_value_tensor(indicators.dtype, ignore_value),
        name="missing")
    zeros_like_indicators = array_ops.zeros_like(
        indicators, dtype=dtypes.int64, name="zeros")
    binary_indicators = array_ops.where(
        missing_indicators, zeros_like_indicators,
        array_ops.ones_like(indicators, dtype=dtypes.int64, name="ones"),
        name="binary_indicators")

    # Use cumsum along the last dimension to generate per-row indexes.
    # Note that these are 1-based (since 0 indicates missing values), so they're
    # off-by-1 from the actual indices. We'll subtract 1 below. Since they're
    # off-by-one, the max value is the size of the last dimension (i.e.,
    # last_index + 1).
    row_index_indicators = array_ops.where(
        missing_indicators, zeros_like_indicators,
        math_ops.cumsum(binary_indicators, axis=-1), "row_index_indicators")
    result_last_dim = array_ops.reshape(
        math_ops.reduce_max(row_index_indicators), shape=(1,),
        name="result_last_dim")

    # Convert to a SparseTensor. The values of this SparseTensor are the last
    # indices of our result, and the last indices of this SparseTensor (i.e.,
    # the class IDs indicated by `indicators`) are the values of our result, so
    # we use tensor slicing and concat to swap them.
    sparse_row_index_indicators = dense_to_sparse_tensor(
        row_index_indicators, ignore_value=0)
    return sparse_tensor.SparseTensor(
        indices=array_ops.concat((
            sparse_row_index_indicators.indices[:, :-1],
            array_ops.reshape(sparse_row_index_indicators.values - 1, (-1, 1))
        ), axis=1, name="indices"),
        values=math_ops.cast(
            sparse_row_index_indicators.indices[:, -1], dtype=dtype,
            name="values"),
        dense_shape=array_ops.concat(
            (sparse_row_index_indicators.dense_shape[0:-1], result_last_dim),
            axis=0, name="dense_shape"))
Beispiel #53
0
def batch_matrix_pow(matrices, powers):
    """Compute powers of matrices, e.g. A^3 = matmul(matmul(A, A), A).

  Uses exponentiation by squaring, with O(log(p)) matrix multiplications to
  compute A^p.

  Args:
    matrices: [batch size x N x N]
    powers: Which integer power to raise each matrix to [batch size]
  Returns:
    The matrices raised to their respective powers, same dimensions as the
    "matrices" argument.
  """
    def terminate_when_all_zero(current_argument, residual_powers,
                                accumulator):
        del current_argument, accumulator  # not used for condition
        do_exit = math_ops.reduce_any(
            math_ops.greater(residual_powers,
                             array_ops.ones_like(residual_powers)))
        return do_exit

    def do_iteration(current_argument, residual_powers, accumulator):
        """Compute one step of iterative exponentiation by squaring.

    The recursive form is:
      power(A, p) = { power(matmul(A, A), p / 2) for even p
                    { matmul(A, power(matmul(A, A), (p - 1) / 2)) for odd p
      power(A, 0) = I

    The power(A, 0) = I case is handled by starting with accumulator set to the
    identity matrix; matrices with zero residual powers are passed through
    unchanged.

    Args:
      current_argument: On this step, what is the first argument (A^2..^2) to
          the (unrolled) recursive function? [batch size x N x N]
      residual_powers: On this step, what is the second argument (residual p)?
          [batch_size]
      accumulator: Accumulates the exterior multiplications from the odd
          powers (initially the identity matrix). [batch_size x N x N]
    Returns:
      Updated versions of each argument for one step of the unrolled
      computation. Does not change parts of the batch which have a residual
      power of zero.
    """
        is_even = math_ops.equal(
            residual_powers % 2,
            array_ops.zeros(array_ops.shape(residual_powers),
                            dtype=dtypes.int32))
        new_accumulator = array_ops.where(
            is_even, accumulator, math_ops.matmul(accumulator,
                                                  current_argument))
        new_argument = math_ops.matmul(current_argument, current_argument)
        do_update = math_ops.greater(residual_powers, 1)
        new_residual_powers = residual_powers - residual_powers % 2
        new_residual_powers //= 2
        # Stop updating if we've reached our base case; some batch elements may
        # finish sooner than others
        accumulator = array_ops.where(do_update, new_accumulator, accumulator)
        current_argument = array_ops.where(do_update, new_argument,
                                           current_argument)
        residual_powers = array_ops.where(do_update, new_residual_powers,
                                          residual_powers)
        return (current_argument, residual_powers, accumulator)

    matrices = ops.convert_to_tensor(matrices)
    powers = math_ops.cast(powers, dtype=dtypes.int32)
    ident = array_ops.expand_dims(
        array_ops.diag(
            array_ops.ones([array_ops.shape(matrices)[1]],
                           dtype=matrices.dtype)), 0)
    ident_tiled = array_ops.tile(ident, [array_ops.shape(matrices)[0], 1, 1])
    (final_argument,
     final_residual_power, final_accumulator) = control_flow_ops.while_loop(
         terminate_when_all_zero, do_iteration,
         [matrices, powers, ident_tiled])
    return array_ops.where(
        math_ops.equal(
            final_residual_power,
            array_ops.zeros_like(final_residual_power, dtype=dtypes.int32)),
        ident_tiled, math_ops.matmul(final_argument, final_accumulator))
 def op(self):
   """Returns the cluster initializer op."""
   return control_flow_ops.cond(
       math_ops.equal(self._num_remaining, 0),
       lambda: check_ops.assert_equal(self._cluster_centers_initialized, True),
       self._initialize)
Beispiel #55
0
 def _introspect_ndims(self, ndims):
     """Helper to establish some properties of input ndims args."""
     if self._is_all_constant_helper(ndims):
         return (tensor_util.constant_value(ndims),
                 tensor_util.constant_value(ndims) == 0)
     return None, math_ops.equal(ndims, 0)
def update_confusion_matrix_variables(variables_to_update,
                                      y_true,
                                      y_pred,
                                      thresholds,
                                      top_k=None,
                                      class_id=None,
                                      sample_weight=None,
                                      multi_label=False,
                                      label_weights=None):
    """Returns op to update the given confusion matrix variables.

  For every pair of values in y_true and y_pred:

  true_positive: y_true == True and y_pred > thresholds
  false_negatives: y_true == True and y_pred <= thresholds
  true_negatives: y_true == False and y_pred <= thresholds
  false_positive: y_true == False and y_pred > thresholds

  The results will be weighted and added together. When multiple thresholds are
  provided, we will repeat the same for every threshold.

  For estimation of these metrics over a stream of data, the function creates an
  `update_op` operation that updates the given variables.

  If `sample_weight` is `None`, weights default to 1.
  Use weights of 0 to mask values.

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A float value or a python list or tuple of float thresholds in
      `[0, 1]`, or NEG_INF (used when top_k is set).
    top_k: Optional int, indicates that the positive labels should be limited to
      the top k predictions.
    class_id: Optional int, limits the prediction and labels to the class
      specified by this argument.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `y_true` dimension).
    multi_label: Optional boolean indicating whether multidimensional
      prediction/labels should be treated as multilabel responses, or flattened
      into a single label. When True, the valus of `variables_to_update` must
      have a second dimension equal to the number of labels in y_true and
      y_pred, and those tensors must not be RaggedTensors.
    label_weights: (optional) tensor of non-negative weights for multilabel
      data. The weights are applied when calculating TP, FP, FN, and TN without
      explicit multilabel handling (i.e. when the data is to be flattened).

  Returns:
    Update op.

  Raises:
    ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
      `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
      `variables_to_update` contains invalid keys.
  """
    if multi_label and label_weights is not None:
        raise ValueError(
            '`label_weights` for multilabel data should be handled '
            'outside of `update_confusion_matrix_variables` when '
            '`multi_label` is True.')
    if variables_to_update is None:
        return
    y_true = math_ops.cast(y_true, dtype=dtypes.float32)
    y_pred = math_ops.cast(y_pred, dtype=dtypes.float32)
    if multi_label:
        thresh_shape = array_ops.shape(thresholds)
        num_thresholds = thresh_shape[0]
        one_thresh = math_ops.equal(math_ops.cast(1, dtype=dtypes.int32),
                                    array_ops.rank(thresholds),
                                    name='one_set_of_thresholds_cond')
    else:
        [y_pred, y_true
         ], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true],
                                                             sample_weight)
        num_thresholds = len(to_list(thresholds))
        one_thresh = math_ops.cast(True, dtype=dtypes.bool)
    y_pred.shape.assert_is_compatible_with(y_true.shape)

    if not any(key
               for key in variables_to_update if key in list(ConfusionMatrix)):
        raise ValueError(
            'Please provide at least one valid confusion matrix '
            'variable to update. Valid variable key options are: "{}". '
            'Received: "{}"'.format(list(ConfusionMatrix),
                                    variables_to_update.keys()))

    invalid_keys = [
        key for key in variables_to_update if key not in list(ConfusionMatrix)
    ]
    if invalid_keys:
        raise ValueError(
            'Invalid keys: {}. Valid variable key options are: "{}"'.format(
                invalid_keys, list(ConfusionMatrix)))

    with ops.control_dependencies([
            check_ops.assert_greater_equal(y_pred,
                                           math_ops.cast(0.0,
                                                         dtype=y_pred.dtype),
                                           message='predictions must be >= 0'),
            check_ops.assert_less_equal(y_pred,
                                        math_ops.cast(1.0, dtype=y_pred.dtype),
                                        message='predictions must be <= 1')
    ]):
        if sample_weight is None:
            y_pred, y_true = tf_losses_utils.squeeze_or_expand_dimensions(
                y_pred, y_true)
        else:
            y_pred, y_true, sample_weight = (
                tf_losses_utils.squeeze_or_expand_dimensions(
                    y_pred, y_true, sample_weight=sample_weight))

    if top_k is not None:
        y_pred = _filter_top_k(y_pred, top_k)
    if class_id is not None:
        y_true = y_true[..., class_id]
        y_pred = y_pred[..., class_id]

    pred_shape = array_ops.shape(y_pred)
    num_predictions = pred_shape[0]
    if y_pred.shape.ndims == 1:
        num_labels = 1
    else:
        num_labels = gen_math_ops.Prod(input=pred_shape[1:], axis=0)
    thresh_label_tile = control_flow_ops.cond(
        one_thresh, lambda: num_labels,
        lambda: math_ops.cast(1, dtype=dtypes.int32))

    # Reshape predictions and labels, adding a dim for thresholding.
    if multi_label:
        predictions_extra_dim = array_ops.expand_dims(y_pred, 0)
        labels_extra_dim = array_ops.expand_dims(
            math_ops.cast(y_true, dtype=dtypes.bool), 0)
    else:
        # Flatten predictions and labels when not multilabel.
        predictions_extra_dim = array_ops.reshape(y_pred, [1, -1])
        labels_extra_dim = array_ops.reshape(
            math_ops.cast(y_true, dtype=dtypes.bool), [1, -1])

    # Tile the thresholds for every prediction.
    if multi_label:
        thresh_pretile_shape = [num_thresholds, 1, -1]
        thresh_tiles = [1, num_predictions, thresh_label_tile]
        data_tiles = [num_thresholds, 1, 1]
    else:
        thresh_pretile_shape = [num_thresholds, -1]
        thresh_tiles = [1, num_predictions * num_labels]
        data_tiles = [num_thresholds, 1]

    thresh_tiled = array_ops.tile(
        array_ops.reshape(array_ops.constant(thresholds, dtype=dtypes.float32),
                          thresh_pretile_shape), array_ops.stack(thresh_tiles))

    # Tile the predictions for every threshold.
    preds_tiled = array_ops.tile(predictions_extra_dim, data_tiles)

    # Compare predictions and threshold.
    pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled)

    # Tile labels by number of thresholds
    label_is_pos = array_ops.tile(labels_extra_dim, data_tiles)

    if sample_weight is not None:
        sample_weight = weights_broadcast_ops.broadcast_weights(
            math_ops.cast(sample_weight, dtype=dtypes.float32), y_pred)
        weights_tiled = array_ops.tile(
            array_ops.reshape(sample_weight, thresh_tiles), data_tiles)
    else:
        weights_tiled = None

    if label_weights is not None and not multi_label:
        label_weights = array_ops.expand_dims(label_weights, 0)
        label_weights = weights_broadcast_ops.broadcast_weights(
            label_weights, y_pred)
        label_weights_tiled = array_ops.tile(
            array_ops.reshape(label_weights, thresh_tiles), data_tiles)
        if weights_tiled is None:
            weights_tiled = label_weights_tiled
        else:
            weights_tiled = math_ops.multiply(weights_tiled,
                                              label_weights_tiled)

    update_ops = []

    def weighted_assign_add(label, pred, weights, var):
        label_and_pred = math_ops.cast(math_ops.logical_and(label, pred),
                                       dtype=dtypes.float32)
        if weights is not None:
            label_and_pred *= weights
        return var.assign_add(math_ops.reduce_sum(label_and_pred, 1))

    loop_vars = {
        ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
    }
    update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
    update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
    update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

    if update_fn or update_tn:
        pred_is_neg = math_ops.logical_not(pred_is_pos)
        loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos,
                                                      pred_is_neg)

    if update_fp or update_tn:
        label_is_neg = math_ops.logical_not(label_is_pos)
        loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg,
                                                      pred_is_pos)
        if update_tn:
            loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg,
                                                         pred_is_neg)

    for matrix_cond, (label, pred) in loop_vars.items():

        if matrix_cond in variables_to_update:
            update_ops.append(
                weighted_assign_add(label, pred, weights_tiled,
                                    variables_to_update[matrix_cond]))

    return control_flow_ops.group(update_ops)
Beispiel #57
0
def train(train_op,
          logdir,
          train_step_fn=train_step,
          train_step_kwargs=_USE_DEFAULT,
          log_every_n_steps=1,
          graph=None,
          master='',
          is_chief=True,
          global_step=None,
          number_of_steps=None,
          init_op=_USE_DEFAULT,
          init_feed_dict=None,
          local_init_op=_USE_DEFAULT,
          init_fn=None,
          ready_op=_USE_DEFAULT,
          summary_op=_USE_DEFAULT,
          save_summaries_secs=600,
          startup_delay_steps=0,
          saver=None,
          save_interval_secs=600,
          sync_optimizer=None,
          session_config=None):
    """Runs a training loop using a TensorFlow supervisor.

  When the sync_optimizer is supplied, gradient updates are applied
  synchronously. Otherwise, gradient updates are applied asynchronous.

  Args:
    train_op: A `Tensor` that, when executed, will apply the gradients and
      return the loss value.
    logdir: The directory where training logs are written to. If None, model
      checkpoints and summaries will not be written.
    train_step_fn: The function to call in order to execute a single gradient
      step. The function must have take exactly four arguments: the current
      session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary.
    train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By
      default, two `Boolean`, scalar ops called "should_stop" and "should_log"
      are provided.
    log_every_n_steps: The frequency, in terms of global steps, that the loss
      and global step and logged.
    graph: The graph to pass to the supervisor. If no graph is supplied the
      default graph is used.
    master: The BNS name of the tensorflow master.
    is_chief: Specifies whether or not the training is being run by the primary
      replica during replica training.
    global_step: The `Tensor` representing the global step. If left as `None`,
      then slim.variables.get_or_create_global_step() is used.
    number_of_steps: The max number of gradient steps to take during training.
      If the value is left as None, training proceeds indefinitely.
    init_op: The initialization operation. If left to its default value, then
      the session is initialized by calling `tf.initialize_all_variables()`.
    init_feed_dict: A feed dictionary to use when executing the `init_op`.
    local_init_op: The local initialization operation. If left to its default
      value, then the session is initialized by calling
      `tf.initialize_local_variables()` and `tf.initialize_all_tables()`.
    init_fn: An optional callable to be executed after `init_op` is called. The
      callable must accept one argument, the session being initialized.
    ready_op: Operation to check if the model is ready to use. If left to its
      default value, then the session checks for readiness by calling
      `tf.report_uninitialized_variables()`.
    summary_op: The summary operation.
    save_summaries_secs: How often, in seconds, to save summaries.
    startup_delay_steps: The number of steps to wait for before beginning. Note
      that this must be 0 if a sync_optimizer is supplied.
    saver: Saver to save checkpoints. If None, a default one will be created
      and used.
    save_interval_secs: How often, in seconds, to save the model to `logdir`.
    sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the
      argument is supplied, gradient updates will be synchronous. If left as
      `None`, gradient updates will be asynchronous.
    session_config: An instance of `tf.ConfigProto` that will be used to
      configure the `Session`. If left as `None`, the default will be used.

  Returns:
    the value of the loss function after training.

  Raises:
    ValueError: if `train_op` is empty or if `startup_delay_steps` is
      non-zero when `sync_optimizer` is supplied, or if `number_of_steps` is
      negative.
  """
    if train_op is None:
        raise ValueError('train_op cannot be None.')

    if logdir is None:
        if summary_op != _USE_DEFAULT:
            raise ValueError('Cannot provide summary_op because logdir=None')
        if saver is not None:
            raise ValueError('Cannot provide saver because logdir=None')

    if sync_optimizer and startup_delay_steps > 0:
        raise ValueError(
            'startup_delay_steps must be zero when sync_optimizer is supplied.'
        )

    if number_of_steps is not None and number_of_steps <= 0:
        raise ValueError(
            '`number_of_steps` must be either None or a positive number.')

    graph = graph or ops.get_default_graph()
    with graph.as_default():
        if global_step is None:
            global_step = variables.get_or_create_global_step()
        saver = saver or tf_saver.Saver()

        if init_op == _USE_DEFAULT:
            init_op = tf_variables.initialize_all_variables()

        if ready_op == _USE_DEFAULT:
            ready_op = tf_variables.report_uninitialized_variables()

        if summary_op == _USE_DEFAULT:
            summary_op = logging_ops.merge_all_summaries()

        if local_init_op == _USE_DEFAULT:
            local_init_op = control_flow_ops.group(
                tf_variables.initialize_local_variables(),
                data_flow_ops.initialize_all_tables())

        cleanup_op = None

        if is_chief and sync_optimizer:
            if not isinstance(sync_optimizer,
                              sync_replicas_optimizer.SyncReplicasOptimizer):
                raise ValueError(
                    '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer'
                )

            # Need to create these BEFORE the supervisor finalizes the graph:
            with ops.control_dependencies([init_op]):
                init_tokens_op = sync_optimizer.get_init_tokens_op()
            init_op = init_tokens_op
            chief_queue_runner = sync_optimizer.get_chief_queue_runner()
            cleanup_op = sync_optimizer.get_clean_up_op()

        if train_step_kwargs == _USE_DEFAULT:
            train_step_kwargs = {}

            if number_of_steps:
                should_stop_op = math_ops.greater_equal(
                    global_step, number_of_steps)
            else:
                should_stop_op = constant_op.constant(False)
            train_step_kwargs['should_stop'] = should_stop_op
            train_step_kwargs['should_log'] = math_ops.equal(
                math_ops.mod(global_step, log_every_n_steps), 0)

    sv = supervisor.Supervisor(graph=graph,
                               is_chief=is_chief,
                               logdir=logdir,
                               init_op=init_op,
                               init_feed_dict=init_feed_dict,
                               local_init_op=local_init_op,
                               ready_op=ready_op,
                               summary_op=summary_op,
                               global_step=global_step,
                               saver=saver,
                               save_summaries_secs=save_summaries_secs,
                               save_model_secs=save_interval_secs,
                               init_fn=init_fn)

    should_retry = True
    while should_retry:
        try:
            should_retry = False
            with sv.managed_session(master,
                                    start_standard_services=False,
                                    config=session_config) as sess:
                logging.info('Starting Session.')
                if is_chief:
                    if logdir:
                        sv.start_standard_services(sess)
                elif startup_delay_steps > 0:
                    _wait_for_step(
                        sess, global_step,
                        min(startup_delay_steps, number_of_steps
                            or sys.maxint))
                sv.start_queue_runners(sess)
                logging.info('Starting Queues.')
                if is_chief and sync_optimizer:
                    sv.start_queue_runners(sess, [chief_queue_runner])
                try:
                    while not sv.should_stop():
                        total_loss, should_stop = train_step_fn(
                            sess, train_op, global_step, train_step_kwargs)
                        if should_stop:
                            logging.info('Stopping Training.')
                            break
                    if logdir and sv.is_chief:
                        logging.info(
                            'Finished training! Saving model to disk.')
                        sv.saver.save(sess,
                                      sv.save_path,
                                      global_step=sv.global_step)
                except:
                    if sv.is_chief and cleanup_op is not None:
                        logging.info('About to execute sync_clean_up_op!')
                        sess.run(cleanup_op)
                    raise

        except errors.AbortedError:
            # Always re-run on AbortedError as it indicates a restart of one of the
            # distributed tensorflow servers.
            logging.info('Retrying training!')
            should_retry = True

    return total_loss
Beispiel #58
0
  def training_graph(self,
                     input_data,
                     input_labels,
                     random_seed,
                     data_spec,
                     input_weights=None):

    """Constructs a TF graph for training a random tree.

    Args:
      input_data: A tensor or SparseTensor or placeholder for input data.
      input_labels: A tensor or placeholder for labels associated with
        input_data.
      random_seed: The random number generator seed to use for this tree.  0
        means use the current time as the seed.
      data_spec: A list of tf.dtype values specifying the original types of
        each column.
      input_weights: A float tensor or placeholder holding per-input weights,
        or None if all inputs are to be weighted equally.

    Returns:
      The last op in the random tree training graph.
    """
    epoch = math_ops.to_int32(get_epoch_variable())

    if input_weights is None:
      input_weights = []

    sparse_indices = []
    sparse_values = []
    sparse_shape = []
    if isinstance(input_data, sparse_tensor.SparseTensor):
      sparse_indices = input_data.indices
      sparse_values = input_data.values
      sparse_shape = input_data.dense_shape
      input_data = []

    # Count extremely random stats.
    (node_sums, node_squares, splits_indices, splits_sums, splits_squares,
     totals_indices, totals_sums, totals_squares,
     input_leaves) = (tensor_forest_ops.count_extremely_random_stats(
         input_data,
         sparse_indices,
         sparse_values,
         sparse_shape,
         data_spec,
         input_labels,
         input_weights,
         self.variables.tree,
         self.variables.tree_thresholds,
         self.variables.node_to_accumulator_map,
         self.variables.candidate_split_features,
         self.variables.candidate_split_thresholds,
         self.variables.start_epoch,
         epoch,
         num_classes=self.params.num_output_columns,
         regression=self.params.regression))
    node_update_ops = []
    node_update_ops.append(
        state_ops.assign_add(self.variables.node_sums, node_sums))

    splits_update_ops = []
    splits_update_ops.append(
        tensor_forest_ops.scatter_add_ndim(self.variables.candidate_split_sums,
                                           splits_indices, splits_sums))
    splits_update_ops.append(
        tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_sums,
                                           totals_indices, totals_sums))

    if self.params.regression:
      node_update_ops.append(state_ops.assign_add(self.variables.node_squares,
                                                  node_squares))
      splits_update_ops.append(
          tensor_forest_ops.scatter_add_ndim(
              self.variables.candidate_split_squares, splits_indices,
              splits_squares))
      splits_update_ops.append(
          tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_squares,
                                             totals_indices, totals_squares))

    # Sample inputs.
    update_indices, feature_updates, threshold_updates = (
        tensor_forest_ops.sample_inputs(
            input_data,
            sparse_indices,
            sparse_values,
            sparse_shape,
            input_weights,
            self.variables.node_to_accumulator_map,
            input_leaves,
            self.variables.candidate_split_features,
            self.variables.candidate_split_thresholds,
            split_initializations_per_input=(
                self.params.split_initializations_per_input),
            split_sampling_random_seed=random_seed))
    update_features_op = state_ops.scatter_update(
        self.variables.candidate_split_features, update_indices,
        feature_updates)
    update_thresholds_op = state_ops.scatter_update(
        self.variables.candidate_split_thresholds, update_indices,
        threshold_updates)

    # Calculate finished nodes.
    with ops.control_dependencies(splits_update_ops):
      # Passing input_leaves to finished nodes here means that nodes that
      # have become stale won't be deallocated until an input reaches them,
      # because we're trying to avoid considering every fertile node for
      # performance reasons.
      finished, stale = tensor_forest_ops.finished_nodes(
          input_leaves,
          self.variables.node_to_accumulator_map,
          self.variables.candidate_split_sums,
          self.variables.candidate_split_squares,
          self.variables.accumulator_sums,
          self.variables.accumulator_squares,
          self.variables.start_epoch,
          epoch,
          num_split_after_samples=self.params.split_after_samples,
          min_split_samples=self.params.min_split_samples,
          dominate_method=self.params.dominate_method,
          dominate_fraction=self.params.dominate_fraction)

    # Update leaf scores.
    # TODO(thomaswc): Store the leaf scores in a TopN and only update the
    # scores of the leaves that were touched by this batch of input.
    children = array_ops.squeeze(
        array_ops.slice(self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1])
    is_leaf = math_ops.equal(constants.LEAF_NODE, children)
    leaves = math_ops.to_int32(
        array_ops.squeeze(
            array_ops.where(is_leaf), squeeze_dims=[1]))
    non_fertile_leaves = array_ops.boolean_mask(
        leaves, math_ops.less(array_ops.gather(
            self.variables.node_to_accumulator_map, leaves), 0))

    # TODO(gilberth): It should be possible to limit the number of non
    # fertile leaves we calculate scores for, especially since we can only take
    # at most array_ops.shape(finished)[0] of them.
    with ops.control_dependencies(node_update_ops):
      sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves)
      if self.params.regression:
        squares = array_ops.gather(self.variables.node_squares,
                                   non_fertile_leaves)
        non_fertile_leaf_scores = self._variance(sums, squares)
      else:
        non_fertile_leaf_scores = self._weighted_gini(sums)

    # Calculate best splits.
    with ops.control_dependencies(splits_update_ops):
      split_indices = tensor_forest_ops.best_splits(
          finished,
          self.variables.node_to_accumulator_map,
          self.variables.candidate_split_sums,
          self.variables.candidate_split_squares,
          self.variables.accumulator_sums,
          self.variables.accumulator_squares,
          regression=self.params.regression)

    # Grow tree.
    with ops.control_dependencies([update_features_op, update_thresholds_op]):
      (tree_update_indices, tree_children_updates, tree_threshold_updates,
       new_eot) = (tensor_forest_ops.grow_tree(
           self.variables.end_of_tree, self.variables.node_to_accumulator_map,
           finished, split_indices, self.variables.candidate_split_features,
           self.variables.candidate_split_thresholds))
      tree_update_op = state_ops.scatter_update(
          self.variables.tree, tree_update_indices, tree_children_updates)
      thresholds_update_op = state_ops.scatter_update(
          self.variables.tree_thresholds, tree_update_indices,
          tree_threshold_updates)
      # TODO(thomaswc): Only update the epoch on the new leaves.
      new_epoch_updates = epoch * array_ops.ones_like(tree_threshold_updates,
                                                      dtype=dtypes.int32)
      epoch_update_op = state_ops.scatter_update(
          self.variables.start_epoch, tree_update_indices,
          new_epoch_updates)

    # Update fertile slots.
    with ops.control_dependencies([tree_update_op]):
      (n2a_map_updates, a2n_map_updates, accumulators_cleared,
       accumulators_allocated) = (tensor_forest_ops.update_fertile_slots(
           finished,
           non_fertile_leaves,
           non_fertile_leaf_scores,
           self.variables.end_of_tree,
           self.variables.accumulator_sums,
           self.variables.node_to_accumulator_map,
           stale,
           self.variables.node_sums,
           regression=self.params.regression))

    # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has
    # used it to calculate new leaves.
    gated_new_eot, = control_flow_ops.tuple(
        [new_eot], control_inputs=[n2a_map_updates])
    eot_update_op = state_ops.assign(self.variables.end_of_tree, gated_new_eot)

    updates = []
    updates.append(eot_update_op)
    updates.append(tree_update_op)
    updates.append(thresholds_update_op)
    updates.append(epoch_update_op)

    updates.append(
        state_ops.scatter_update(self.variables.node_to_accumulator_map,
                                 n2a_map_updates[0], n2a_map_updates[1]))

    updates.append(
        state_ops.scatter_update(self.variables.accumulator_to_node_map,
                                 a2n_map_updates[0], a2n_map_updates[1]))

    cleared_and_allocated_accumulators = array_ops.concat_v2(
        [accumulators_cleared, accumulators_allocated], 0)

    # Calculate values to put into scatter update for candidate counts.
    # Candidate split counts are always reset back to 0 for both cleared
    # and allocated accumulators. This means some accumulators might be doubly
    # reset to 0 if the were released and not allocated, then later allocated.
    split_values = array_ops.tile(
        array_ops.expand_dims(array_ops.expand_dims(
            array_ops.zeros_like(cleared_and_allocated_accumulators,
                                 dtype=dtypes.float32), 1), 2),
        [1, self.params.num_splits_to_consider, self.params.num_output_columns])
    updates.append(state_ops.scatter_update(
        self.variables.candidate_split_sums,
        cleared_and_allocated_accumulators, split_values))
    if self.params.regression:
      updates.append(state_ops.scatter_update(
          self.variables.candidate_split_squares,
          cleared_and_allocated_accumulators, split_values))

    # Calculate values to put into scatter update for total counts.
    total_cleared = array_ops.tile(
        array_ops.expand_dims(
            math_ops.neg(array_ops.ones_like(accumulators_cleared,
                                             dtype=dtypes.float32)), 1),
        [1, self.params.num_output_columns])
    total_reset = array_ops.tile(
        array_ops.expand_dims(
            array_ops.zeros_like(accumulators_allocated,
                                 dtype=dtypes.float32), 1),
        [1, self.params.num_output_columns])
    accumulator_updates = array_ops.concat_v2([total_cleared, total_reset], 0)
    updates.append(state_ops.scatter_update(
        self.variables.accumulator_sums,
        cleared_and_allocated_accumulators, accumulator_updates))
    if self.params.regression:
      updates.append(state_ops.scatter_update(
          self.variables.accumulator_squares,
          cleared_and_allocated_accumulators, accumulator_updates))

    # Calculate values to put into scatter update for candidate splits.
    split_features_updates = array_ops.tile(
        array_ops.expand_dims(
            math_ops.neg(array_ops.ones_like(
                cleared_and_allocated_accumulators)), 1),
        [1, self.params.num_splits_to_consider])
    updates.append(state_ops.scatter_update(
        self.variables.candidate_split_features,
        cleared_and_allocated_accumulators, split_features_updates))

    updates += self.finish_iteration()

    return control_flow_ops.group(*updates)
Beispiel #59
0
def err(labels,
        predictions,
        weights=None,
        metrics_collections=None,
        updates_collections=None,
        name=None):
    """Calculates how often `predictions` matches `labels`.

  The `accuracy` function creates two local variables, `total` and
  `count` that are used to compute the frequency with which `predictions`
  matches `labels`. This frequency is ultimately returned as `accuracy`: an
  idempotent operation that simply divides `total` by `count`.

  For estimation of the metric over a stream of data, the function creates an
  `update_op` operation that updates these variables and returns the `accuracy`.
  Internally, an `is_correct` operation computes a `Tensor` with elements 1.0
  where the corresponding elements of `predictions` and `labels` match and 0.0
  otherwise. Then `update_op` increments `total` with the reduced sum of the
  product of `weights` and `is_correct`, and it increments `count` with the
  reduced sum of `weights`.

  If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

  Args:
    labels: The ground truth values, a `Tensor` whose shape matches
      `predictions`.
    predictions: The predicted values, a `Tensor` of any shape.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `labels` dimension).
    metrics_collections: An optional list of collections that `accuracy` should
      be added to.
    updates_collections: An optional list of collections that `update_op` should
      be added to.
    name: An optional variable_scope name.

  Returns:
    accuracy: A `Tensor` representing the accuracy, the value of `total` divided
      by `count`.
    update_op: An operation that increments the `total` and `count` variables
      appropriately and whose value matches `accuracy`.

  Raises:
    ValueError: If `predictions` and `labels` have mismatched shapes, or if
      `weights` is not `None` and its shape doesn't match `predictions`, or if
      either `metrics_collections` or `updates_collections` are not a list or
      tuple.
    RuntimeError: If eager execution is enabled.
  """
    if context.executing_eagerly():
        raise RuntimeError('tf.metrics.err is not supported when eager '
                           'execution is enabled.')

    predictions, labels, weights = _remove_squeezable_dimensions(
        predictions=predictions, labels=labels, weights=weights)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    if labels.dtype != predictions.dtype:
        predictions = math_ops.cast(predictions, labels.dtype)
    is_correct = 1 - math_ops.cast(math_ops.equal(predictions, labels),
                                   dtypes.float32)
    return tf.metrics.mean(is_correct, weights, metrics_collections,
                           updates_collections, name or 'err')
    def _process_input_helper(self,
                              update_row_factors,
                              sp_input=None,
                              transpose_input=False,
                              row_weights=None):
        """Creates the graph for processing a sparse slice of input.

    Args:
      update_row_factors: if True, update or project the row_factors, else
        update or project the column factors.
      sp_input: Please refer to comments for update_row_factors,
        update_col_factors, project_row_factors, and project_col_factors for
        restrictions.
      transpose_input: If True, the input is logically transposed and then the
        corresponding rows/columns of the transposed input are updated.
      row_weights: If not None, this is the row/column weights to be used for
        the update or projection. If None, use the corresponding weights from
        the model. Note that the feature (column/row) weights will be
        determined by the model. When not None, it can either be a scalar or
        a rank-1 tensor with the same number of elements as the number of rows
        of columns to be updated/projected.

    Returns:
      A tuple consisting of the following three elements:
      new_values: New values for the row/column factors.
      update_op: An op that assigns the newly computed values to the row/column
        factors.
      loss: A tensor (scalar) that contains the normalized minibatch loss,
        corresponding to sp_input.
    """
        assert isinstance(sp_input, sparse_tensor.SparseTensor)

        if update_row_factors:
            left = self._row_factors
            right_factors = self._col_factors_cache
            row_wt = self._row_wt_cache
            col_wt = self._col_wt_cache
            total_rows = self._input_rows
            sharding_func = WALSModel._get_sharding_func(
                self._input_rows, self._num_row_shards)
            gramian = self._col_gramian_cache
        else:
            left = self._col_factors
            right_factors = self._row_factors_cache
            row_wt = self._col_wt_cache
            col_wt = self._row_wt_cache
            total_rows = self._input_cols
            sharding_func = WALSModel._get_sharding_func(
                self._input_cols, self._num_col_shards)
            gramian = self._row_gramian_cache
            transpose_input = not transpose_input

        # Note that the row indices of sp_input are based on the original full input
        # Here we reindex the rows and give them contiguous ids starting at 0.
        # We use tf.unique to achieve this reindexing. Note that this is done so
        # that the downstream kernel can assume that the input is "dense" along the
        # row dimension.
        row_ids, col_ids = array_ops.split(value=sp_input.indices,
                                           num_or_size_splits=2,
                                           axis=1)
        update_row_indices, all_row_ids = array_ops.unique(row_ids[:, 0])
        update_col_indices, all_col_ids = array_ops.unique(col_ids[:, 0])
        col_ids = array_ops.expand_dims(
            math_ops.cast(all_col_ids, dtypes.int64), 1)
        row_ids = array_ops.expand_dims(
            math_ops.cast(all_row_ids, dtypes.int64), 1)

        if transpose_input:
            update_indices = update_col_indices
            row_shape = [
                math_ops.cast(
                    array_ops.shape(update_row_indices)[0], dtypes.int64)
            ]
            gather_indices = update_row_indices
        else:
            update_indices = update_row_indices
            row_shape = [
                math_ops.cast(
                    array_ops.shape(update_col_indices)[0], dtypes.int64)
            ]
            gather_indices = update_col_indices

        num_rows = math_ops.cast(
            array_ops.shape(update_indices)[0], dtypes.int64)
        col_shape = [num_rows]
        right = embedding_ops.embedding_lookup(right_factors,
                                               gather_indices,
                                               partition_strategy="div")
        new_sp_indices = array_ops.concat([row_ids, col_ids], 1)
        new_sp_shape = (array_ops.concat([row_shape, col_shape], 0)
                        if transpose_input else array_ops.concat(
                            [col_shape, row_shape], 0))
        new_sp_input = sparse_tensor.SparseTensor(indices=new_sp_indices,
                                                  values=sp_input.values,
                                                  dense_shape=new_sp_shape)

        # Compute lhs and rhs of the normal equations
        total_lhs = (self._unobserved_weight * gramian)
        if self._regularization_matrix is not None:
            total_lhs += self._regularization_matrix
        if self._row_weights is None:
            # Special case of ALS. Use a much simpler update rule.
            total_rhs = (self._unobserved_weight *
                         sparse_ops.sparse_tensor_dense_matmul(
                             new_sp_input, right, adjoint_a=transpose_input))
            # TODO(rmlarsen): handle transposing in tf.matrix_solve instead of
            # transposing explicitly.
            # TODO(rmlarsen): multi-thread tf.matrix_solve.
            new_left_values = array_ops.transpose(
                linalg_ops.matrix_solve(total_lhs,
                                        array_ops.transpose(total_rhs)))
        else:
            if row_weights is None:
                # TODO(yifanchen): Add special handling for single shard without using
                # embedding_lookup and perform benchmarks for those cases. Same for
                # col_weights lookup below.
                row_weights_slice = embedding_ops.embedding_lookup(
                    row_wt, update_indices, partition_strategy="div")
            else:
                num_indices = array_ops.shape(update_indices)[0]
                with ops.control_dependencies([
                        check_ops.assert_less_equal(
                            array_ops.rank(row_weights), 1)
                ]):
                    row_weights_slice = control_flow_ops.cond(
                        math_ops.equal(array_ops.rank(row_weights), 0), lambda:
                        (array_ops.ones([num_indices]) * row_weights),
                        lambda: math_ops.cast(row_weights, dtypes.float32))

            col_weights = embedding_ops.embedding_lookup(
                col_wt, gather_indices, partition_strategy="div")
            partial_lhs, total_rhs = (
                gen_factorization_ops.wals_compute_partial_lhs_and_rhs(
                    right,
                    col_weights,
                    self._unobserved_weight,
                    row_weights_slice,
                    new_sp_input.indices,
                    new_sp_input.values,
                    num_rows,
                    transpose_input,
                    name="wals_compute_partial_lhs_rhs"))
            total_lhs = array_ops.expand_dims(total_lhs, 0) + partial_lhs
            total_rhs = array_ops.expand_dims(total_rhs, -1)
            new_left_values = array_ops.squeeze(
                linalg_ops.matrix_solve(total_lhs, total_rhs), [2])

        update_op_name = "row_update" if update_row_factors else "col_update"
        update_op = self.scatter_update(left,
                                        update_indices,
                                        new_left_values,
                                        sharding_func,
                                        name=update_op_name)

        # Create the loss subgraph
        loss_sp_input = (sparse_ops.sparse_transpose(new_sp_input)
                         if transpose_input else new_sp_input)
        # sp_approx is the low rank estimate of the input matrix, formed by
        # computing the product <u_i, v_j> for (i, j) in loss_sp_input.indices.
        sp_approx_vals = gen_factorization_ops.masked_matmul(
            new_left_values,
            right,
            loss_sp_input.indices,
            transpose_a=False,
            transpose_b=True)
        sp_approx = sparse_tensor.SparseTensor(loss_sp_input.indices,
                                               sp_approx_vals,
                                               loss_sp_input.dense_shape)
        sp_approx_sq = math_ops.square(sp_approx)
        sp_residual = sparse_ops.sparse_add(loss_sp_input, sp_approx * (-1))
        sp_residual_sq = math_ops.square(sp_residual)
        row_wt_mat = (constant_op.constant(0.) if self._row_weights is None
                      else array_ops.expand_dims(row_weights_slice, 1))
        col_wt_mat = (constant_op.constant(0.) if self._col_weights is None
                      else array_ops.expand_dims(col_weights, 0))
        # We return the normalized loss
        partial_row_gramian = math_ops.matmul(new_left_values,
                                              new_left_values,
                                              transpose_a=True)
        normalization_factor = total_rows / math_ops.cast(
            num_rows, dtypes.float32)
        loss = (self._unobserved_weight *
                (sparse_ops.sparse_reduce_sum(sp_residual_sq) -
                 sparse_ops.sparse_reduce_sum(sp_approx_sq) + math_ops.trace(
                     math_ops.matmul(partial_row_gramian, gramian))) +
                sparse_ops.sparse_reduce_sum(
                    row_wt_mat *
                    (sp_residual_sq * col_wt_mat))) * normalization_factor
        if self._regularization is not None:
            loss += self._regularization * (
                math_ops.trace(partial_row_gramian) * normalization_factor +
                math_ops.trace(gramian))
        return (new_left_values, update_op, loss)