예제 #1
0
파일: weighted.py 프로젝트: brookisme/tfbox
def pixel_weighted_categorical_crossentropy(weights,
                                            target,
                                            output,
                                            from_logits=False,
                                            axis=-1):
    """ pixel weighted version of tf.keras.backend.categorical_crossentropy

    copy of https://github.com/tensorflow/tensorflow/blob/v2.3.1/tensorflow/python/keras/backend.py#L4640-L4708
    except for last line where weights are introduced
    """
    target = ops.convert_to_tensor_v2(target)
    output = ops.convert_to_tensor_v2(output)
    target.shape.assert_is_compatible_with(output.shape)
    if from_logits:
        return nn.softmax_cross_entropy_with_logits_v2(labels=target,
                                                       logits=output,
                                                       axis=axis)
    if (not isinstance(output, (ops.EagerTensor, variables_module.Variable))
            and output.op.type
            == 'Softmax') and not hasattr(output, '_keras_history'):
        assert len(output.op.inputs) == 1
        output = output.op.inputs[0]
        return nn.softmax_cross_entropy_with_logits_v2(labels=target,
                                                       logits=output,
                                                       axis=axis)
    output = output / math_ops.reduce_sum(output, axis, True)
    epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
    output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
    return -math_ops.reduce_sum(weights * target * math_ops.log(output), axis)
예제 #2
0
def model_fn(features, labels, mode):
    with ops.device("/device:IPU:0"):
        with variable_scope.variable_scope("ascope", use_resource=True):
            x = array_ops.reshape(features, [-1, 4])
            x = layers.dense(inputs=x, units=10)
            x = layers.dense(inputs=x, units=3)

            if mode in [
                    model_fn_lib.ModeKeys.TRAIN, model_fn_lib.ModeKeys.EVAL
            ]:
                labels = array_ops.stop_gradient(labels)
                loss = math_ops.reduce_mean(
                    nn.softmax_cross_entropy_with_logits_v2(logits=x,
                                                            labels=labels))
            else:
                loss = None

            if mode == model_fn_lib.ModeKeys.TRAIN:
                opt = gradient_descent.GradientDescentOptimizer(0.01)
                train = opt.minimize(loss, training_util.get_global_step())
            else:
                train = None

    summary_ops.ipu_compile_summary("compile_summary", [train, loss])

    return model_fn_lib.EstimatorSpec(mode=mode,
                                      predictions=x,
                                      loss=loss,
                                      train_op=train)
예제 #3
0
def softmax_cross_entropy(
        onehot_labels,
        logits,
        weights=1.0,
        scope=None,
        loss_collection=ops.GraphKeys.LOSSES,
        reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS):

    with ops.name_scope(scope, "softmax_cross_entropy_loss",
                        (logits, onehot_labels, weights)) as scope:
        logits = ops.convert_to_tensor(logits)
        onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
        logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

        onehot_labels = array_ops.stop_gradient(onehot_labels,
                                                name="labels_stop_gradient")
        losses = nn.softmax_cross_entropy_with_logits_v2(labels=onehot_labels,
                                                         logits=logits,
                                                         name="xentropy")
        # losses = focal_loss(
        #     labels=onehot_labels, logits=logits, alpha=500, gamma=2)

        return tf.losses.compute_weighted_loss(losses,
                                               weights,
                                               scope,
                                               loss_collection,
                                               reduction=reduction)
예제 #4
0
def softmax_cross_entropy_v2(onehot_labels,
                             logits,
                             weights=1.0,
                             label_smoothing=0,
                             scope=None):
    from tensorflow.python.framework import ops
    from tensorflow.python.ops import math_ops, nn, array_ops
    from tensorflow.python.ops.losses.losses_impl import compute_weighted_loss, Reduction
    loss_collection = ops.GraphKeys.LOSSES
    reduction = Reduction.SUM_BY_NONZERO_WEIGHTS
    if onehot_labels is None:
        raise ValueError("onehot_labels must not be None.")
    if logits is None:
        raise ValueError("logits must not be None.")
    with ops.name_scope(scope, "softmax_cross_entropy_loss",
                        (logits, onehot_labels, weights)) as scope:
        logits = ops.convert_to_tensor(logits)
        onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
        logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

        if label_smoothing > 0:
            num_classes = math_ops.cast(
                array_ops.shape(onehot_labels)[1], logits.dtype)
            smooth_positives = 1.0 - label_smoothing
            smooth_negatives = label_smoothing / num_classes
            onehot_labels = onehot_labels * smooth_positives + smooth_negatives

        losses = nn.softmax_cross_entropy_with_logits_v2(labels=onehot_labels,
                                                         logits=logits,
                                                         name="xentropy")
        return compute_weighted_loss(losses,
                                     weights,
                                     scope,
                                     loss_collection,
                                     reduction=reduction)
예제 #5
0
def softmax_cross_entropy(
    onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES,
    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
      new_onehot_labels = onehot_labels * (1 - label_smoothing)
                          + label_smoothing / num_classes

  Args:
    onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels.
    logits: `[batch_size, num_classes]` logits outputs of the network .
    weights: Optional `Tensor` whose rank is either 0, or rank 1 and is
      broadcastable to the loss which is a `Tensor` of shape `[batch_size]`.
    label_smoothing: If greater than 0 then smooth the labels.
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
    `NONE`, this has shape `[batch_size]`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
      or if the shape of `weights` is invalid or if `weights` is None.  Also if
      `onehot_labels` or `logits` is None.
  """
  if onehot_labels is None:
    raise ValueError("onehot_labels must not be None.")
  if logits is None:
    raise ValueError("logits must not be None.")
  with ops.name_scope(scope, "softmax_cross_entropy_loss",
                      (logits, onehot_labels, weights)) as scope:
    logits = ops.convert_to_tensor(logits)
    onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
    logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

    if label_smoothing > 0:
      num_classes = math_ops.cast(
          array_ops.shape(onehot_labels)[1], logits.dtype)
      smooth_positives = 1.0 - label_smoothing
      smooth_negatives = label_smoothing / num_classes
      onehot_labels = onehot_labels * smooth_positives + smooth_negatives

    onehot_labels = array_ops.stop_gradient(
        onehot_labels, name="labels_stop_gradient")
    losses = nn.softmax_cross_entropy_with_logits_v2(
        labels=onehot_labels, logits=logits, name="xentropy")


    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
예제 #6
0
def softmax_cross_entropy(
    onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES,
    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
      new_onehot_labels = onehot_labels * (1 - label_smoothing)
                          + label_smoothing / num_classes

  Args:
    onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels.
    logits: `[batch_size, num_classes]` logits outputs of the network .
    weights: Optional `Tensor` whose rank is either 0, or rank 1 and is
      broadcastable to the loss which is a `Tensor` of shape `[batch_size]`.
    label_smoothing: If greater than 0 then smooth the labels.
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
    `NONE`, this has shape `[batch_size]`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
      or if the shape of `weights` is invalid or if `weights` is None.  Also if
      `onehot_labels` or `logits` is None.
  """
  if onehot_labels is None:
    raise ValueError("onehot_labels must not be None.")
  if logits is None:
    raise ValueError("logits must not be None.")
  with ops.name_scope(scope, "softmax_cross_entropy_loss",
                      (logits, onehot_labels, weights)) as scope:
    logits = ops.convert_to_tensor(logits)
    onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
    logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

    if label_smoothing > 0:
      num_classes = math_ops.cast(
          array_ops.shape(onehot_labels)[1], logits.dtype)
      smooth_positives = 1.0 - label_smoothing
      smooth_negatives = label_smoothing / num_classes
      onehot_labels = onehot_labels * smooth_positives + smooth_negatives

    onehot_labels = array_ops.stop_gradient(
        onehot_labels, name="labels_stop_gradient")
    losses = nn.softmax_cross_entropy_with_logits_v2(
        labels=onehot_labels, logits=logits, name="xentropy")


    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
예제 #7
0
  def testMatMulFwdBackwd(self):
    with self.session() as sess:

      cfg = ipu.utils.create_ipu_config(profiling=True)
      cfg = ipu.utils.set_ipu_model_options(cfg, compile_ipu_code=False)
      cfg = ipu.utils.auto_select_ipus(cfg, 1)
      ipu.utils.configure_ipu_system(cfg)

      with ops.device("/device:IPU:0"):
        with variable_scope.variable_scope("vs", use_resource=True):
          w1 = variable_scope.get_variable(
              "w1",
              shape=[4, 3],
              dtype=np.float32,
              initializer=init_ops.constant_initializer(
                  np.array([[1, 2, 1], [1, 3, 4], [1, 5, 6], [1, 7, 8]],
                           dtype=np.float32)))
          b1 = variable_scope.get_variable(
              "b1",
              shape=[3],
              dtype=np.float32,
              initializer=init_ops.constant_initializer(
                  np.array([2, 1, 1], dtype=np.float32)))
          w2 = variable_scope.get_variable(
              "w2",
              shape=[3, 2],
              dtype=np.float32,
              initializer=init_ops.constant_initializer(
                  np.array([[3, 4], [5, 6], [7, 8]], dtype=np.float32)))
          b2 = variable_scope.get_variable(
              "b2",
              shape=[2],
              dtype=np.float32,
              initializer=init_ops.constant_initializer(
                  np.array([2, 1], dtype=np.float32)))

        x = array_ops.placeholder(np.float32, shape=[3, 4])
        y = math_ops.matmul(x, w1) + b1
        y = math_ops.matmul(y, w2) + b2

        expected = array_ops.placeholder(np.float32, shape=[3, 2])
        xent = nn.softmax_cross_entropy_with_logits_v2(
            logits=y, labels=array_ops.stop_gradient(expected))

        optimizer = gradient_descent.GradientDescentOptimizer(0.1)
        train = optimizer.minimize(xent)

        fd = {
            x:
            np.array([[7, 3, 5, 9], [1, 2, 3, 4], [5, 6, 7, 8]],
                     dtype=np.float32),
            expected: [[1, 2], [3, 4], [5, 6]]
        }

        sess.run(variables.global_variables_initializer())
        sess.run(train, feed_dict=fd)
예제 #8
0
 def my_net(X, Y):
   # Forward pass
   logits = RNN(X)
   # Loss
   cross_entropy = math_ops.reduce_mean(
       nn.softmax_cross_entropy_with_logits_v2(
           logits=logits, labels=array_ops.stop_gradient(Y)))
   # Training
   train = gradient_descent.GradientDescentOptimizer(0.01).minimize(
       cross_entropy)
   return [cross_entropy, train]
예제 #9
0
def _PopnnGRU(x, initial_state, y):
  gru_cell = ipu.ops.rnn_ops.PopnnGRU(
      num_hidden,
      dtype=dataType,
      weights_initializer=init_ops.zeros_initializer(dtype=dataType),
      bias_initializer=init_ops.zeros_initializer(dtype=dataType))
  outputs, _ = gru_cell(x, initial_state=initial_state, training=True)
  softmax = nn.softmax_cross_entropy_with_logits_v2(
      logits=outputs[-1], labels=array_ops.stop_gradient(y))
  loss = math_ops.reduce_mean(softmax)
  train = gradient_descent.GradientDescentOptimizer(lr).minimize(loss)
  return [loss, train]
예제 #10
0
def _PopnnLSTM(x, h, c, y):
    lstm_cell = popnn_rnn.PopnnLSTM(
        num_hidden,
        dtype=dataType,
        weights_initializer=init_ops.zeros_initializer(dtype=dataType),
        bias_initializer=init_ops.zeros_initializer(dtype=dataType))
    state = rnn_cell.LSTMStateTuple(c, h)
    outputs, _ = lstm_cell(x, initial_state=state, training=True)
    softmax = nn.softmax_cross_entropy_with_logits_v2(
        logits=outputs[-1], labels=array_ops.stop_gradient(y))
    loss = math_ops.reduce_mean(softmax)
    train = gradient_descent.GradientDescentOptimizer(lr).minimize(loss)
    return [loss, train]
예제 #11
0
    def testMatMulFwdBackwdLeftHandWeights(self):
        with ops.device("/device:IPU:0"):
            with variable_scope.variable_scope("vs", use_resource=True):
                w1 = variable_scope.get_variable(
                    "w1",
                    shape=[3, 4],
                    dtype=np.float32,
                    initializer=init_ops.constant_initializer(
                        np.array([[1, 2, 1, 1], [3, 4, 1, 5], [6, 1, 7, 8]],
                                 dtype=np.float32)))
                b1 = variable_scope.get_variable(
                    "b1",
                    shape=[3],
                    dtype=np.float32,
                    initializer=init_ops.constant_initializer(
                        np.array([2, 1, 1], dtype=np.float32)))
                w2 = variable_scope.get_variable(
                    "w2",
                    shape=[2, 3],
                    dtype=np.float32,
                    initializer=init_ops.constant_initializer(
                        np.array([[3, 4, 5], [6, 7, 8]], dtype=np.float32)))
                b2 = variable_scope.get_variable(
                    "b2",
                    shape=[3],
                    dtype=np.float32,
                    initializer=init_ops.constant_initializer(
                        np.array([2, 1, 1], dtype=np.float32)))

            x = array_ops.placeholder(np.float32, shape=[4, 3])
            y = math_ops.matmul(w1, x) + b1
            y = math_ops.matmul(w2, y) + b2

            expected = array_ops.placeholder(np.float32, shape=[2, 3])
            xent = nn.softmax_cross_entropy_with_logits_v2(
                logits=y, labels=array_ops.stop_gradient(expected))

            optimizer = gradient_descent.GradientDescentOptimizer(0.1)
            train = optimizer.minimize(xent)

        with session_lib.Session() as sess:
            fd = {
                x:
                np.array([[7, 3, 5], [1, 2, 3], [5, 6, 7], [3, 5, 2]],
                         dtype=np.float32),
                expected: [[1, 2, 1], [3, 4, 3]]
            }

            sess.run(variables.global_variables_initializer())
            sess.run(train, feed_dict=fd)
예제 #12
0
    def _RunLSTMLayerTraining(self, name, input_value, forget_bias,
                              weights_value, h_value, c_value, training_steps,
                              labels_array, lstm_layer_function,
                              device_string):
        pinputs = array_ops.placeholder(dataType,
                                        [seq_len, batch_size, input_size],
                                        name="inputs")
        plabels = array_ops.placeholder(dataType,
                                        [seq_len, batch_size, num_channels],
                                        name="labels")

        with ops.device(device_string):
            with variable_scope.variable_scope("lstm_layer",
                                               use_resource=True):
                initial_h_state = _get_variable(
                    "initial_h_state",
                    shape=[batch_size, num_channels],
                    initializer=init_ops.constant_initializer(
                        h_value, dataType))
                initial_c_state = _get_variable(
                    "initial_c_state",
                    shape=[batch_size, num_channels],
                    initializer=init_ops.constant_initializer(
                        c_value, dataType))
            logits = lstm_layer_function(inputs=pinputs,
                                         weights_value=weights_value,
                                         initial_state=(initial_h_state,
                                                        initial_c_state),
                                         forget_bias=forget_bias,
                                         training=True,
                                         name=name)
            softmax = nn.softmax_cross_entropy_with_logits_v2(
                logits=logits, labels=array_ops.stop_gradient(plabels))
            loss = math_ops.reduce_mean(softmax)
            train = gradient_descent.GradientDescentOptimizer(0.01).minimize(
                loss)

        with session_lib.Session() as sess:
            sess.run(variables.global_variables_initializer())
            losses = []
            inputs = _createLSTMInput(input_value, batch_size, seq_len,
                                      input_size)
            fd = {
                pinputs: inputs,
                plabels: labels_array,
            }
            for _ in range(0, training_steps):
                l, _ = sess.run([loss, train], fd)
                losses.append(l)
            return losses
예제 #13
0
        def my_model(loss, x, y):
          with ipu.scopes.ipu_scope("/device:IPU:0"):
            lstm_cell = rnn_cell.LSTMCell(128)
            x, _ = rnn.dynamic_rnn(cell=lstm_cell,
                                   inputs=x,
                                   dtype=dtypes.float32,
                                   time_major=True)

            cross_entropy = nn.softmax_cross_entropy_with_logits_v2(
                logits=x, labels=array_ops.stop_gradient(y))
            loss = math_ops.reduce_mean(cross_entropy)

            optim = gradient_descent.GradientDescentOptimizer(0.01)
            train = optim.minimize(cross_entropy)

            return [loss, train]
예제 #14
0
def _tfGRU(x, initial_state, y):
  gru_cell = rnn_cell.GRUCell(
      num_hidden,
      name='gru_cell',
      kernel_initializer=init_ops.zeros_initializer(dtype=dataType),
      bias_initializer=init_ops.zeros_initializer(dtype=dataType))
  outputs, _ = rnn.dynamic_rnn(gru_cell,
                               x,
                               dtype=dataType,
                               initial_state=initial_state,
                               time_major=True)
  softmax = nn.softmax_cross_entropy_with_logits_v2(
      logits=outputs[-1], labels=array_ops.stop_gradient(y))
  loss = math_ops.reduce_mean(softmax)
  train = gradient_descent.GradientDescentOptimizer(lr).minimize(loss)
  return [loss, train]
예제 #15
0
def _tfLSTM(x, h, c, y):
    lstm_cell = rnn_cell.LSTMCell(
        num_hidden,
        name='basic_lstm_cell',
        forget_bias=0.,
        initializer=init_ops.zeros_initializer(dtype=dataType))
    state = rnn_cell.LSTMStateTuple(c, h)
    outputs, _ = rnn.dynamic_rnn(lstm_cell,
                                 x,
                                 dtype=dataType,
                                 initial_state=state,
                                 time_major=True)
    softmax = nn.softmax_cross_entropy_with_logits_v2(
        logits=outputs[-1], labels=array_ops.stop_gradient(y))
    loss = math_ops.reduce_mean(softmax)
    train = gradient_descent.GradientDescentOptimizer(lr).minimize(loss)
    return [loss, train]
예제 #16
0
        def my_graph(inp, lab):
            with ops.device("/device:IPU:0"):
                with ipu.ops.ipu_shard(0):
                    x = layers.Conv2D(8, 3, padding='same', name="convA")(inp)

                with ipu.ops.ipu_shard(1):
                    x = layers.Conv2D(8, 1, padding='same', name="convB")(x)
                    x = math_ops.reduce_mean(x, axis=[1, 2])

                    loss = nn.softmax_cross_entropy_with_logits_v2(
                        logits=x, labels=array_ops.stop_gradient(lab))
                    loss = math_ops.reduce_mean(loss)

                opt = ipu.sharded_optimizer.ShardedOptimizer(
                    gradient_descent.GradientDescentOptimizer(0.000001))
                train = opt.minimize(loss)

            return [loss, train]
예제 #17
0
            def my_model(lr, loss, x, y):
                with ipu.ops.ipu_scope("/device:IPU:0"):
                    inp = x

                    x = layers.Conv2D(8,
                                      3,
                                      padding='same',
                                      name="conv1",
                                      use_bias=False)(x)
                    x = math_ops.reduce_max(x, axis=[1, 2])

                    cross_entropy = nn.softmax_cross_entropy_with_logits_v2(
                        logits=x, labels=array_ops.stop_gradient(y))
                    loss = math_ops.reduce_mean(cross_entropy)

                    optim = so.ShardedOptimizer(
                        gd.GradientDescentOptimizer(lr))
                    train = optim.minimize(cross_entropy)

                    autoshard.automatic_sharding(2, inp, loss)

                    return [lr, loss, train]
예제 #18
0
def softmax_cross_entropy(onehot_labels,
                          logits,
                          weights=1.0,
                          label_smoothing=0,
                          scope=None,
                          loss_collection=ops.GraphKeys.LOSSES,
                          reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
    r"""Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits_v2.
    `weights` acts as a coefficient for the loss. If a scalar is provided,
    then the loss is simply scaled by the given value. If `weights` is a
    tensor of shape `[batch_size]`, then the loss weights apply to each
    corresponding sample.
    If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
        new_onehot_labels = onehot_labels * (1 - label_smoothing)
                            + label_smoothing / num_classes
    Note that `onehot_labels` and `logits` must have the same shape,
    e.g. `[batch_size, num_classes]`. The shape of `weights` must be
    broadcastable to loss, whose shape is decided by the shape of `logits`.
    In case the shape of `logits` is `[batch_size, num_classes]`, loss is
    a `Tensor` of shape `[batch_size]`.
    Args:
      onehot_labels: One-hot-encoded labels.
      logits: Logits outputs of the network.
      weights: Optional `Tensor` that is broadcastable to loss.
      label_smoothing: If greater than 0 then smooth the labels.
      scope: the scope for the operations performed in computing the loss.
      loss_collection: collection to which the loss will be added.
      reduction: Type of reduction to apply to loss.
    Returns:
      Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
      `NONE`, this has shape `[batch_size]`; otherwise, it is scalar.
    Raises:
      ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
        or if the shape of `weights` is invalid or if `weights` is None.  Also if
        `onehot_labels` or `logits` is None.
    @compatibility(TF2)
    `tf.compat.v1.losses.softmax_cross_entropy` is mostly compatible with eager
    execution and `tf.function`. But, the `loss_collection` argument is
    ignored when executing eagerly and no loss will be written to the loss
    collections. You will need to either hold on to the return value manually
    or rely on `tf.keras.Model` loss tracking.
    To switch to native TF2 style, instantiate the
     `tf.keras.losses.CategoricalCrossentropy` class with `from_logits` set
    as `True` and call the object instead.
    #### Structural Mapping to Native TF2
    Before:
    ```python
    loss = tf.compat.v1.losses.softmax_cross_entropy(
      onehot_labels=onehot_labels,
      logits=logits,
      weights=weights,
      label_smoothing=smoothing)
    ```
    After:
    ```python
    loss_fn = tf.keras.losses.CategoricalCrossentropy(
      from_logits=True,
      label_smoothing=smoothing)
    loss = loss_fn(
      y_true=onehot_labels,
      y_pred=logits,
      sample_weight=weights)
    ```
    #### How to Map Arguments
    | TF1 Arg Name          | TF2 Arg Name     | Note                       |
    | :-------------------- | :--------------- | :------------------------- |
    |  -                    | `from_logits`    | Set `from_logits` as True  :
    :                       :                  : to have identical behavior :
    | `onehot_labels`       | `y_true`         | In `__call__()` method     |
    | `logits`              | `y_pred`         | In `__call__()` method     |
    | `weights`             | `sample_weight`  | In `__call__()` method     |
    | `label_smoothing`     | `label_smoothing`| In constructor             |
    | `scope`               | Not supported    | -                          |
    | `loss_collection`     | Not supported    | Losses should be tracked   :
    :                       :                  : explicitly or with Keras   :
    :                       :                  : APIs, for example,         :
    :                       :                  : [add_loss][add_loss],      :
    :                       :                  : instead of via collections :
    | `reduction`           | `reduction`      | In constructor. Value of   :
    : : : `tf.compat.v1.losses.Reduction.SUM_OVER_BATCH_SIZE`,              :
    : : : `tf.compat.v1.losses.Reduction.SUM`,                              :
    : : : `tf.compat.v1.losses.Reduction.NONE` in                           :
    : : : `tf.compat.v1.losses.softmax_cross_entropy` correspond to         :
    : : : `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE`,                  :
    : : : `tf.keras.losses.Reduction.SUM`,                                  :
    : : : `tf.keras.losses.Reduction.NONE`, respectively. If you            :
    : : : used other value for `reduction`, including the default value     :
    : : :  `tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS`, there is :
    : : : no directly corresponding value. Please modify the loss           :
    : : : implementation manually.                                          :
    [add_loss]:https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#add_loss
    #### Before & After Usage Example
    Before:
    >>> y_true = [[0, 1, 0], [0, 0, 1]]
    >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
    >>> weights = [0.3, 0.7]
    >>> smoothing = 0.2
    >>> tf.compat.v1.losses.softmax_cross_entropy(y_true, y_pred, weights=weights,
    ...   label_smoothing=smoothing).numpy()
    0.57618
    After:
    >>> cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True,
    ...   label_smoothing=smoothing)
    >>> cce(y_true, y_pred, sample_weight=weights).numpy()
    0.57618
    @end_compatibility
    """

    if onehot_labels is None:
        raise ValueError("onehot_labels must not be None.")
    if logits is None:
        raise ValueError("logits must not be None.")
    with ops.name_scope(scope, "softmax_cross_entropy_loss",
                        (logits, onehot_labels, weights)) as scope:
        logits = ops.convert_to_tensor(logits)
        onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
        logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

        if label_smoothing > 0:
            num_classes = math_ops.cast(
                array_ops.shape(onehot_labels)[-1], logits.dtype)
            smooth_positives = 1.0 - label_smoothing
            smooth_negatives = label_smoothing / num_classes
            onehot_labels = onehot_labels * smooth_positives + smooth_negatives

        onehot_labels = array_ops.stop_gradient(onehot_labels,
                                                name="labels_stop_gradient")
        losses = nn.softmax_cross_entropy_with_logits_v2(labels=onehot_labels,
                                                         logits=logits,
                                                         name="xentropy")

        return compute_weighted_loss(losses,
                                     losses,
                                     scope,
                                     loss_collection,
                                     reduction=reduction)