Example #1
0
def softmax_cross_entropy_with_logits(logits, labels, name=None):
    """Computes softmax cross entropy between `logits` and `labels`.

  Measures the probability error in discrete classification tasks in which the
  classes are mutually exclusive (each entry is in exactly one class).  For
  example, each CIFAR-10 image is labeled with one and only one label: an image
  can be a dog or a truck, but not both.

  **NOTE:**  While the classes are mutually exclusive, their probabilities
  need not be.  All that is required is that each row of `labels` is
  a valid probability distribution.  If using exclusive `labels`
  (wherein one and only one class is true at a time), see
  `sparse_softmax_cross_entropy_with_logits`.

  **WARNING:** This op expects unscaled logits, since it performs a `softmax`
  on `logits` internally for efficiency.  Do not call this op with the
  output of `softmax`, as it will produce incorrect results.

  `logits` and `labels` must have the same shape `[batch_size, num_classes]`
  and the same dtype (either `float32` or `float64`).

  Args:
    logits: Unscaled log probabilities.
    labels: Each row `labels[i]` must be a valid probability distribution.
    name: A name for the operation (optional).

  Returns:
    A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the
    softmax cross entropy loss.
  """
    # The second output tensor contains the gradients.  We use it in
    # _CrossEntropyGrad() in nn_grad but not here.
    cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits(logits, labels, name=name)
    return cost
Example #2
0
def softmax_cross_entropy_with_logits(logits, labels, name=None):
    """Computes softmax cross entropy between `logits` and `labels`.

  Measures the probability error in discrete classification tasks in which the
  classes are mutually exclusive (each entry is in exactly one class).  For
  example, each CIFAR-10 image is labeled with one and only one label: an image
  can be a dog or a truck, but not both.

  **NOTE:**:  While the classes are mutually exclusive, their probabilities
  need not be.  All that is required is that each row of `labels` is
  a valid probability distribution.  If using exclusive `labels`
  (wherein one and only one class is true at a time), see
  `sparse_softmax_cross_entropy_with_logits`.

  **WARNING:** This op expects unscaled logits, since it performs a `softmax`
  on `logits` internally for efficiency.  Do not call this op with the
  output of `softmax`, as it will produce incorrect results.

  `logits` and `labels` must have the same shape `[batch_size, num_classes]`
  and the same dtype (either `float32` or `float64`).

  Args:
    logits: Unscaled log probabilities.
    labels: Each row `labels[i]` must be a valid probability distribution.
    name: A name for the operation (optional).

  Returns:
    A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the
    softmax cross entropy loss.
  """
    # The second output tensor contains the gradients.  We use it in
    # _CrossEntropyGrad() in nn_grad but not here.
    cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits(
        logits, labels, name=name)
    return cost
Example #3
0
 def _testXent(self, np_features, np_labels, use_gpu=False):
     np_loss, np_backprop = self._npXent(np_features, np_labels)
     with self.test_session(use_gpu=use_gpu) as sess:
         loss, backprop = gen_nn_ops._softmax_cross_entropy_with_logits(np_features, np_labels)
         tf_loss, tf_backprop = sess.run([loss, backprop])
     self.assertAllCloseAccordingToType(np_loss, tf_loss)
     self.assertAllCloseAccordingToType(np_backprop, tf_backprop)
Example #4
0
 def _testXent(self, np_features, np_labels, use_gpu=False):
   np_loss, np_backprop = self._npXent(np_features, np_labels)
   with self.test_session(use_gpu=use_gpu) as sess:
     loss, backprop = gen_nn_ops._softmax_cross_entropy_with_logits(
         logits=np_features, labels=np_labels)
     tf_loss, tf_backprop = sess.run([loss, backprop])
   self.assertAllCloseAccordingToType(np_loss, tf_loss)
   self.assertAllCloseAccordingToType(np_backprop, tf_backprop)
Example #5
0
 def _testSingleClass(self, use_gpu=False):
   for dtype in np.float16, np.float32:
     with self.test_session(use_gpu=use_gpu) as sess:
       loss, backprop = gen_nn_ops._softmax_cross_entropy_with_logits(
           np.array([[1.], [-1.], [0.]]).astype(dtype),
           np.array([[-1.], [0.], [1.]]).astype(dtype))
       tf_loss, tf_backprop = sess.run([loss, backprop])
     self.assertAllClose([0.0, 0.0, 0.0], tf_loss)
     self.assertAllClose([[2.0], [1.0], [0.0]], tf_backprop)
Example #6
0
 def _testSingleClass(self, use_gpu=False):
   for dtype in np.float16, np.float32:
     with self.test_session(use_gpu=use_gpu) as sess:
       loss, backprop = gen_nn_ops._softmax_cross_entropy_with_logits(
           np.array([[1.], [-1.], [0.]]).astype(dtype),
           np.array([[-1.], [0.], [1.]]).astype(dtype))
       tf_loss, tf_backprop = sess.run([loss, backprop])
     self.assertAllClose([0.0, 0.0, 0.0], tf_loss)
     self.assertAllClose([[2.0], [1.0], [0.0]], tf_backprop)
Example #7
0
 def testNotMatrix(self):
   with self.test_session():
     with self.assertRaises(ValueError):
       gen_nn_ops._softmax_cross_entropy_with_logits(logits=[0., 1., 2., 3.],
                                                     labels=[0., 1., 0., 1.])
Example #8
0
 def testShapeMismatch(self):
   with self.test_session():
     with self.assertRaises(ValueError):
       gen_nn_ops._softmax_cross_entropy_with_logits(
           logits=[[0., 1.], [2., 3.]], labels=[[0., 1., 0.], [1., 0., 0.]])
Example #9
0
 def testNotMatrix(self):
   with self.test_session():
     with self.assertRaises(ValueError):
       gen_nn_ops._softmax_cross_entropy_with_logits([0., 1., 2., 3.],
                                                     [0., 1., 0., 1.])
Example #10
0
 def testShapeMismatch(self):
   with self.test_session():
     with self.assertRaises(ValueError):
       gen_nn_ops._softmax_cross_entropy_with_logits(
           [[0., 1.], [2., 3.]], [[0., 1., 0.], [1., 0., 0.]])
Example #11
0
def softmax_cross_entropy_with_logits_v2(
        _sentinel=None,  # pylint: disable=invalid-name
        labels=None,
        logits=None,
        dim=-1,
        name=None):
    """Computes softmax cross entropy between `logits` and `labels`.

  Measures the probability error in discrete classification tasks in which the
  classes are mutually exclusive (each entry is in exactly one class).  For
  example, each CIFAR-10 image is labeled with one and only one label: an image
  can be a dog or a truck, but not both.

  **NOTE:**  While the classes are mutually exclusive, their probabilities
  need not be.  All that is required is that each row of `labels` is
  a valid probability distribution.  If they are not, the computation of the
  gradient will be incorrect.

  If using exclusive `labels` (wherein one and only
  one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`.

  **WARNING:** This op expects unscaled logits, since it performs a `softmax`
  on `logits` internally for efficiency.  Do not call this op with the
  output of `softmax`, as it will produce incorrect results.

  `logits` and `labels` must have the same shape, e.g.
  `[batch_size, num_classes]` and the same dtype (either `float16`, `float32`,
  or `float64`).

  Backpropagation will happen into both `logits` and `labels`.  To disallow
  backpropagation into `labels`, pass label tensors through a `stop_gradients`
  before feeding it to this function.

  **Note that to avoid confusion, it is required to pass only named arguments to
  this function.**

  Args:
    _sentinel: Used to prevent positional parameters. Internal, do not use.
    labels: Each row `labels[i]` must be a valid probability distribution.
    logits: Unscaled log probabilities.
    dim: The class dimension. Defaulted to -1 which is the last dimension.
    name: A name for the operation (optional).

  Returns:
    A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the
    softmax cross entropy loss.
  """
    _ensure_xent_args("softmax_cross_entropy_with_logits", _sentinel, labels,
                      logits)

    # TODO(pcmurray) Raise an error when the labels do not sum to 1. Note: This
    # could break users who call this with bad labels, but disregard the bad
    # results.

    with ops.name_scope(name, "softmax_cross_entropy_with_logits",
                        [logits, labels]) as name:
        logits = ops.convert_to_tensor(logits, name="logits")
        labels = ops.convert_to_tensor(labels, name="labels")
        precise_logits = math_ops.cast(logits, dtypes.float32) if (
            logits.dtype == dtypes.float16) else logits
        # labels and logits must be of the same type
        labels = math_ops.cast(labels, precise_logits.dtype)
        input_rank = array_ops.rank(precise_logits)
        # For shape inference.
        shape = logits.get_shape()

        # Move the dim to the end if dim is not the last dimension.
        if dim is not -1:

            def _move_dim_to_end(tensor, dim_index, rank):
                return array_ops.transpose(
                    tensor,
                    array_ops.concat([
                        math_ops.range(dim_index),
                        math_ops.range(dim_index + 1, rank), [dim_index]
                    ], 0))

            precise_logits = _move_dim_to_end(precise_logits, dim, input_rank)
            labels = _move_dim_to_end(labels, dim, input_rank)

        input_shape = array_ops.shape(precise_logits)

        # Make precise_logits and labels into matrices.
        precise_logits = _flatten_outer_dims(precise_logits)
        labels = _flatten_outer_dims(labels)

        # Do the actual op computation.
        # The second output tensor contains the gradients.  We use it in
        # _CrossEntropyGrad() in nn_grad but not here.
        cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits(
            precise_logits, labels, name=name)

        # The output cost shape should be the input minus dim.
        output_shape = array_ops.slice(input_shape, [0],
                                       [math_ops.subtract(input_rank, 1)])
        cost = array_ops.reshape(cost, output_shape)

        # Make shape inference work since reshape and transpose may erase its static
        # shape.
        if context.in_graph_mode(
        ) and shape is not None and shape.dims is not None:
            shape = shape.as_list()
            del shape[dim]
            cost.set_shape(shape)

        if logits.dtype == dtypes.float16:
            return math_ops.cast(cost, dtypes.float16)
        else:
            return cost