コード例 #1
0
    def _accumulate_tau(self, loss: Tensor, mode: str, warmup: bool) -> Tensor:
        """Determine an average loss value based on a particular method chosen during __init__.

        Right now this only supports constant values or exponential averaging. The original paper also proposed global
        averaging, but they didn't find much difference between the three methods and global averaging would more
        complicated memory requirements.

        Args:
            loss: The current step loss.
            mode: The current step mode.
            warmup: Whether running in warmup mode or not.

        Returns:
            Either the static value provided at __init__, or an exponential moving average of the loss over time.
        """
        if self.tau_method == 'exp':
            if _read_variable(self.initialized[mode]):
                _assign(
                    self.tau[mode], self.tau[mode] - 0.1 *
                    (self.tau[mode] - reduce_mean(loss)))
            else:
                _assign(self.tau[mode], reduce_mean(loss))
                if not warmup:
                    _assign(self.initialized[mode],
                            ones_like(self.initialized[mode]))
        return self.tau[mode]
コード例 #2
0
def hinge(y_true: Tensor, y_pred: Tensor) -> Tensor:
    """Calculate the hinge loss between two tensors.

    This method can be used with TensorFlow tensors:
    ```python
    true = tf.constant([[-1,1,1,-1], [1,1,1,1], [-1,-1,1,-1], [1,-1,-1,-1]])
    pred = tf.constant([[0.1,0.9,0.05,0.05], [0.1,-0.2,0.0,-0.7], [0.0,0.15,0.8,0.05], [1.0,-1.0,-1.0,-1.0]])
    b = fe.backend.hinge(y_pred=pred, y_true=true)  # [0.8  1.2  0.85 0.  ]
    ```

    This method can be used with PyTorch tensors:
    ```python
    true = torch.tensor([[-1,1,1,-1], [1,1,1,1], [-1,-1,1,-1], [1,-1,-1,-1]])
    pred = torch.tensor([[0.1,0.9,0.05,0.05], [0.1,-0.2,0.0,-0.7], [0.0,0.15,0.8,0.05], [1.0,-1.0,-1.0,-1.0]])
    b = fe.backend.hinge(y_pred=pred, y_true=true)  # [0.8  1.2  0.85 0.  ]
    ```

    Args:
        y_true: Ground truth class labels which should take values of 1 or -1.
        y_pred: Prediction score for each class, with a shape like y_true. dtype: float32 or float16.

    Returns:
        The hinge loss between `y_true` and `y_pred`

    Raises:
        ValueError: If `y_pred` is an unacceptable data type.
    """
    y_true = cast(y_true, 'float32')
    return reduce_mean(clip_by_value(1.0 - y_true * y_pred, min_value=0), axis=-1)
コード例 #3
0
    def _loss_preprocess(
            self, loss: Union[Tensor,
                              List[Tensor]]) -> Union[Tensor, List[Tensor]]:
        """Loss preprocess for multi-GPU and mixed-precision training.

        Args:
            loss: Unprocessed loss.

        Returns:
            Processed loss.
        """
        if self.extra_loss:
            loss = loss + tf.reduce_sum(self.model.losses)
        loss = reduce_mean(loss)

        if self.framework == "tf":
            # scale up loss for mixed precision training to avoid underflow
            if self.model.mixed_precision:
                loss = self.model.current_optimizer.get_scaled_loss(loss)
            # for multi-gpu training, the gradient will be combined by sum, normalize the loss
            strategy = tf.distribute.get_strategy()
            if isinstance(strategy, tf.distribute.MirroredStrategy):
                loss = loss / strategy.num_replicas_in_sync

        else:  # self.framework == "torch"
            if self.model.current_optimizer.scaler is not None:
                # scale up loss for mixed precision training to avoid underflow
                loss = self.model.current_optimizer.scaler.scale(loss)

        return loss
コード例 #4
0
def update_model(model: Union[tf.keras.Model, torch.nn.Module],
                 loss: Union[tf.Tensor, torch.Tensor],
                 tape: Optional[tf.GradientTape] = None,
                 retain_graph: bool = True):
    """Update `model` weights based on a given `loss`.

    This method can be used with TensorFlow models:
    ```python
    m = fe.build(fe.architecture.tensorflow.LeNet, optimizer_fn="adam")
    x = tf.ones((3,28,28,1))  # (batch, height, width, channels)
    y = tf.constant((1, 0, 1))
    with tf.GradientTape(persistent=True) as tape:
        pred = fe.backend.feed_forward(m, x)  # [[~0.5, ~0.5], [~0.5, ~0.5], [~0.5, ~0.5]]
        loss = fe.backend.sparse_categorical_crossentropy(y_pred=pred, y_true=y)  # ~2.3
        fe.backend.update_model(m, loss=loss, tape=tape)
    ```

    This method can be used with PyTorch models:
    ```python
    m = fe.build(fe.architecture.pytorch.LeNet, optimizer_fn="adam")
    x = torch.ones((3,1,28,28))  # (batch, channels, height, width)
    y = torch.tensor((1, 0, 1))
    pred = fe.backend.feed_forward(m, x)  # [[~0.5, ~0.5], [~0.5, ~0.5], [~0.5, ~0.5]]
    loss = fe.backend.sparse_categorical_crossentropy(y_pred=pred, y_true=y)  # ~2.3
    fe.backend.update_model(m, loss=loss)
    ```

    Args:
        model: A neural network instance to update.
        loss: A loss value to compute gradients from.
        tape: A TensorFlow GradientTape which was recording when the `loss` was computed (iff using TensorFlow).
        retain_graph: Whether to keep the model graph in memory (applicable only for PyTorch).

    Raises:
        ValueError: If `model` is an unacceptable data type.
    """
    loss = reduce_mean(loss)
    if isinstance(model, tf.keras.Model):
        # scale up loss for mixed precision training to avoid underflow
        if isinstance(model.current_optimizer, mixed_precision.LossScaleOptimizer):
            loss = model.current_optimizer.get_scaled_loss(loss)
        # for multi-gpu training, the gradient will be combined by sum, normalize the loss
        strategy = tf.distribute.get_strategy()
        if isinstance(strategy, tf.distribute.MirroredStrategy):
            loss = loss / strategy.num_replicas_in_sync
        gradients = get_gradient(loss, model.trainable_variables, tape=tape)
        with tape.stop_recording():
            # scale down gradient to balance scale-up loss
            if isinstance(model.current_optimizer, mixed_precision.LossScaleOptimizer):
                gradients = model.current_optimizer.get_unscaled_gradients(gradients)
            model.current_optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    elif isinstance(model, torch.nn.Module):
        gradients = get_gradient(loss, model.parameters(), retain_graph=retain_graph)
        for gradient, parameter in zip(gradients, model.parameters()):
            parameter.grad = gradient
        model.current_optimizer.step()
    else:
        raise ValueError("Unrecognized model instance {}".format(type(model)))
コード例 #5
0
def categorical_crossentropy(y_pred: Tensor, y_true: Tensor, from_logits: bool = False,
                             average_loss: bool = True) -> Tensor:
    """Compute categorical crossentropy.

    Note that if any of the `y_pred` values are exactly 0, this will result in a NaN output. If `from_logits` is
    False, then each entry of `y_pred` should sum to 1. If they don't sum to 1 then tf and torch backends will
    result in different numerical values.

    This method can be used with TensorFlow tensors:
    ```python
    true = tf.constant([[0, 1, 0], [1, 0, 0], [0, 0, 1]])
    pred = tf.constant([[0.1, 0.8, 0.1], [0.9, 0.05, 0.05], [0.1, 0.2, 0.7]])
    b = fe.backend.categorical_crossentropy(y_pred=pred, y_true=true)  # 0.228
    b = fe.backend.categorical_crossentropy(y_pred=pred, y_true=true, average_loss=False)  # [0.223, 0.105, 0.356]
    ```

    This method can be used with PyTorch tensors:
    ```python
    true = torch.tensor([[0, 1, 0], [1, 0, 0], [0, 0, 1]])
    pred = torch.tensor([[0.1, 0.8, 0.1], [0.9, 0.05, 0.05], [0.1, 0.2, 0.7]])
    b = fe.backend.categorical_crossentropy(y_pred=pred, y_true=true)  # 0.228
    b = fe.backend.categorical_crossentropy(y_pred=pred, y_true=true, average_loss=False)  # [0.223, 0.105, 0.356]
    ```

    Args:
        y_pred: Prediction with a shape like (Batch, C). dtype: float32 or float16.
        y_true: Ground truth class labels with a shape like `y_pred`. dtype: int or float32 or float16.
        from_logits: Whether y_pred is from logits. If True, a sigmoid will be applied to the prediction.
        average_loss: Whether to average the element-wise loss.

    Returns:
        The categorical crossentropy between `y_pred` and `y_true`. A scalar if `average_loss` is True, else a
        tensor with the shape (Batch).

    Raises:
        AssertionError: If `y_true` or `y_pred` are unacceptable data types.
    """
    assert type(y_pred) == type(y_true), "y_pred and y_true must be same tensor type"
    assert isinstance(y_pred, (tf.Tensor, torch.Tensor)), "only support tf.Tensor or torch.Tensor as y_pred"
    assert isinstance(y_true, (tf.Tensor, torch.Tensor)), "only support tf.Tensor or torch.Tensor as y_true"
    if tf.is_tensor(y_pred):
        ce = tf.losses.categorical_crossentropy(y_pred=y_pred, y_true=y_true, from_logits=from_logits)
    else:
        y_true = y_true.to(torch.float)
        ce = _categorical_crossentropy_torch(y_pred=y_pred, y_true=y_true, from_logits=from_logits)
    if average_loss:
        ce = reduce_mean(ce)
    return ce
コード例 #6
0
def mean_squared_error(y_true: Tensor, y_pred: Tensor) -> Tensor:
    """Calculate mean squared error between two tensors.

    This method can be used with TensorFlow tensors:
    ```python
    true = tf.constant([[0,1,0,0], [0,0,0,1], [0,0,1,0], [1,0,0,0]])
    pred = tf.constant([[0.1,0.9,0.05,0.05], [0.1,0.2,0.0,0.7], [0.0,0.15,0.8,0.05], [1.0,0.0,0.0,0.0]])
    b = fe.backend.mean_squared_error(y_pred=pred, y_true=true)  # [0.0063, 0.035, 0.016, 0.0]
    true = tf.constant([[1], [3], [2], [0]])
    pred = tf.constant([[2.0], [0.0], [2.0], [1.0]])
    b = fe.backend.mean_squared_error(y_pred=pred, y_true=true)  # [1.0, 9.0, 0.0, 1.0]
    ```

    This method can be used with PyTorch tensors:
    ```python
    true = torch.tensor([[0,1,0,0], [0,0,0,1], [0,0,1,0], [1,0,0,0]])
    pred = torch.tensor([[0.1,0.9,0.05,0.05], [0.1,0.2,0.0,0.7], [0.0,0.15,0.8,0.05], [1.0,0.0,0.0,0.0]])
    b = fe.backend.mean_squared_error(y_pred=pred, y_true=true)  # [0.0063, 0.035, 0.016, 0.0]
    true = tf.constant([[1], [3], [2], [0]])
    pred = tf.constant([[2.0], [0.0], [2.0], [1.0]])
    b = fe.backend.mean_squared_error(y_pred=pred, y_true=true)  # [1.0, 9.0, 0.0, 1.0]
    ```

    Args:
        y_true: Ground truth class labels with a shape like (batch) or (batch, n_classes). dtype: int, float16, float32.
        y_pred: Prediction score for each class, with a shape like y_true. dtype: float32 or float16.

    Returns:
        The MSE between `y_true` and `y_pred`

    Raises:
        AssertionError: If `y_true` and `y_pred` have mismatched shapes or disparate types.
        ValueError: If `y_pred` is an unacceptable data type.
    """
    assert type(y_pred) == type(y_true), "y_pred and y_true must be of the same tensor type"
    assert y_pred.shape == y_true.shape, \
        f"MSE requires y_true and y_pred to have the same shape, but found {y_true.shape} and {y_pred.shape}"
    if tf.is_tensor(y_pred):
        mse = tf.losses.MSE(y_true, y_pred)
    elif isinstance(y_pred, torch.Tensor):
        mse = reduce_mean(
            torch.nn.MSELoss(reduction="none")(y_pred, y_true), axis=[ax for ax in range(y_pred.ndim)][1:])
    else:
        raise ValueError("Unrecognized tensor type {}".format(type(y_pred)))
    return mse
コード例 #7
0
    def forward(self, data: List[Tensor],
                state: Dict[str, Any]) -> Union[Tensor, List[Tensor]]:
        base_loss = self.loss.forward(data, state)
        tau = self._accumulate_tau(base_loss, state['mode'], state['warmup'])
        beta = (base_loss - tau) / self.lam
        # TODO The authors say to remove the gradients. Need to check whether this is necessary (speed or metrics)
        ln_sigma = -lambertw(0.5 * maximum(self.cap, beta))
        super_loss = (base_loss - tau) * exp(ln_sigma) + self.lam * pow(
            ln_sigma, 2)

        if self.average_loss:
            super_loss = reduce_mean(super_loss)

        if len(self.outputs) == 2:
            # User requested that the confidence score be returned
            return [super_loss, exp(ln_sigma)]

        return super_loss
コード例 #8
0
def update_model(
        model: Union[tf.keras.Model, torch.nn.Module],
        loss: Union[None, tf.Tensor, torch.Tensor] = None,
        gradients: Optional[List[Union[tf.Tensor, torch.Tensor]]] = None,
        tape: Optional[tf.GradientTape] = None,
        retain_graph: bool = True,
        scaler: Optional[torch.cuda.amp.GradScaler] = None,
        defer: bool = False,
        deferred: Optional[Dict[str, List[Callable[[],
                                                   None]]]] = None) -> None:
    """Update `model` weights based on a given `loss`.

    This method can be used with TensorFlow models:
    ```python
    m = fe.build(fe.architecture.tensorflow.LeNet, optimizer_fn="adam")
    x = tf.ones((3,28,28,1))  # (batch, height, width, channels)
    y = tf.constant((1, 0, 1))
    with tf.GradientTape(persistent=True) as tape:
        pred = fe.backend.feed_forward(m, x)  # [[~0.5, ~0.5], [~0.5, ~0.5], [~0.5, ~0.5]]
        loss = fe.backend.sparse_categorical_crossentropy(y_pred=pred, y_true=y)  # ~2.3
        fe.backend.update_model(m, loss=loss, tape=tape)
    ```

    This method can be used with PyTorch models:
    ```python
    m = fe.build(fe.architecture.pytorch.LeNet, optimizer_fn="adam")
    x = torch.ones((3,1,28,28))  # (batch, channels, height, width)
    y = torch.tensor((1, 0, 1))
    pred = fe.backend.feed_forward(m, x)  # [[~0.5, ~0.5], [~0.5, ~0.5], [~0.5, ~0.5]]
    loss = fe.backend.sparse_categorical_crossentropy(y_pred=pred, y_true=y)  # ~2.3
    fe.backend.update_model(m, loss=loss)
    ```

    Args:
        model: A neural network instance to update.
        loss: A loss value to compute gradients from, mutually exclusive with `gradients`.
        gradients: A list of tensors to update the models, mutually exclusive with `loss`.
        tape: A TensorFlow GradientTape which was recording when the `loss` was computed (iff using TensorFlow).
        retain_graph: Whether to keep the model graph in memory (applicable only for PyTorch).
        scaler: A PyTorch loss scaler that scales loss when PyTorch mixed precision is used.
        defer: If True, then the model update function will be stored into the `deferred` dictionary rather than
            applied immediately.
        deferred: A dictionary in which model update functions are stored.

    Raises:
        ValueError: If `model` is an unacceptable data type.
        RuntimeError: If attempting to modify a PyTorch model which relied on gradients within a different PyTorch model
            which has in turn already undergone a non-deferred update.
    """
    if loss is not None:
        loss = reduce_mean(loss)
    if isinstance(model, tf.keras.Model):
        if loss is not None:
            # scale up loss for mixed precision training to avoid underflow
            if isinstance(model.current_optimizer,
                          mixed_precision.LossScaleOptimizer):
                loss = model.current_optimizer.get_scaled_loss(loss)
            # for multi-gpu training, the gradient will be combined by sum, normalize the loss
            strategy = tf.distribute.get_strategy()
            if isinstance(strategy, tf.distribute.MirroredStrategy):
                loss = loss / strategy.num_replicas_in_sync
            gradients = get_gradient(loss,
                                     model.trainable_variables,
                                     tape=tape)
        with tape.stop_recording():
            # scale down gradient to balance scale-up loss
            if isinstance(model.current_optimizer,
                          mixed_precision.LossScaleOptimizer):
                gradients = model.current_optimizer.get_unscaled_gradients(
                    gradients)
            if defer:
                deferred.setdefault(
                    model.model_name,
                    []).append(lambda: model.current_optimizer.apply_gradients(
                        zip(gradients, model.trainable_variables)))
            else:
                model.current_optimizer.apply_gradients(
                    zip(gradients, model.trainable_variables))
    elif isinstance(model, torch.nn.Module):
        trainable_params = [p for p in model.parameters() if p.requires_grad]
        # scale up loss for mixed precision training to avoid underflow
        if scaler is not None:
            loss = scaler.scale(loss)
        if loss is not None:
            try:
                gradients = get_gradient(loss,
                                         trainable_params,
                                         retain_graph=retain_graph)
            except RuntimeError as err:
                if err.args and isinstance(
                        err.args[0], str
                ) and err.args[0].startswith(
                        'one of the variables needed for gradient computation has been modified by an inplace operation'
                ):
                    raise RuntimeError(
                        "When computing gradients for '{}', some variables it relied on during the forward pass had"
                        " been updated. Consider setting defer=True in earlier UpdateOps related to models which "
                        "interact with this one.".format(model.model_name))
                raise err
        for gradient, parameter in zip(gradients, trainable_params):
            if parameter.grad is not None:
                parameter.grad += gradient
            else:
                parameter.grad = gradient.clone()
        if defer:
            # Only need to call once per model since gradients are getting accumulated
            deferred[model.model_name] = [
                lambda: _torch_step(model.current_optimizer, scaler)
            ]
        else:
            _torch_step(model.current_optimizer, scaler)

            if deferred:
                deferred.pop(model.model_name,
                             None)  # Don't need those deferred steps anymore
    else:
        raise ValueError("Unrecognized model instance {}".format(type(model)))
コード例 #9
0
 def forward(self, data: List[Tensor], state: Dict[str, Any]) -> Tensor:
     y_pred, y_true = data
     loss = mean_squared_error(y_true=y_true, y_pred=y_pred)
     if self.average_loss:
         loss = reduce_mean(loss)
     return loss
コード例 #10
0
def binary_crossentropy(y_pred: Tensor,
                        y_true: Tensor,
                        from_logits: bool = False,
                        average_loss: bool = True) -> Tensor:
    """Compute binary crossentropy.

    This method is applicable when there are only two label classes (zero and one). There should be a single floating
    point prediction per example.

    This method can be used with TensorFlow tensors:
    ```python
    true = tf.constant([[1], [0], [1], [0]])
    pred = tf.constant([[0.9], [0.3], [0.8], [0.1]])
    b = fe.backend.binary_crossentropy(y_pred=pred, y_true=true)  # 0.197
    b = fe.backend.binary_crossentropy(y_pred=pred, y_true=true, average_loss=False)  # [0.105, 0.356, 0.223, 0.105]
    ```

    This method can be used with PyTorch tensors:
    ```python
    true = torch.tensor([[1], [0], [1], [0]])
    pred = torch.tensor([[0.9], [0.3], [0.8], [0.1]])
    b = fe.backend.binary_crossentropy(y_pred=pred, y_true=true)  # 0.197
    b = fe.backend.binary_crossentropy(y_pred=pred, y_true=true, average_loss=False)  # [0.105, 0.356, 0.223, 0.105]
    ```

    Args:
        y_pred: Prediction with a shape like (batch, ...). dtype: float32 or float16.
        y_true: Ground truth class labels with the same shape as `y_pred`. dtype: int or float32 or float16.
        from_logits: Whether y_pred is from logits. If True, a sigmoid will be applied to the prediction.
        average_loss: Whether to average the element-wise loss.

    Returns:
        The binary crossentropy between `y_pred` and `y_true`. A scalar if `average_loss` is True, else a tensor with
        the same shape as `y_true`.

    Raises:
        AssertionError: If `y_true` or `y_pred` are unacceptable data types.
    """
    assert type(y_pred) is type(
        y_true), "y_pred and y_true must be same tensor type"
    assert isinstance(y_pred, torch.Tensor) or tf.is_tensor(
        y_pred), "only support tf.Tensor or torch.Tensor as y_pred"
    assert isinstance(y_true, torch.Tensor) or tf.is_tensor(
        y_true), "only support tf.Tensor or torch.Tensor as y_true"
    if tf.is_tensor(y_pred):
        ce = tf.losses.binary_crossentropy(y_pred=y_pred,
                                           y_true=tf.reshape(
                                               y_true, y_pred.shape),
                                           from_logits=from_logits)
        ce = tf.reshape(ce, [ce.shape[0], -1])
        ce = tf.reduce_mean(ce, 1)
    else:
        y_true = y_true.to(torch.float)
        if from_logits:
            ce = torch.nn.BCEWithLogitsLoss(reduction="none")(
                input=y_pred, target=y_true.view(y_pred.size()))
        else:
            ce = torch.nn.BCELoss(reduction="none")(input=y_pred,
                                                    target=y_true.view(
                                                        y_pred.size()))
        ce = ce.view(ce.shape[0], -1)
        ce = torch.mean(ce, dim=1)

    if average_loss:
        ce = reduce_mean(ce)
    return ce
コード例 #11
0
def categorical_crossentropy(
        y_pred: Tensor,
        y_true: Tensor,
        from_logits: bool = False,
        average_loss: bool = True,
        class_weights: Optional[Weight_Dict] = None) -> Tensor:
    """Compute categorical crossentropy.

    Note that if any of the `y_pred` values are exactly 0, this will result in a NaN output. If `from_logits` is
    False, then each entry of `y_pred` should sum to 1. If they don't sum to 1 then tf and torch backends will
    result in different numerical values.

    This method can be used with TensorFlow tensors:
    ```python
    true = tf.constant([[0, 1, 0], [1, 0, 0], [0, 0, 1]])
    pred = tf.constant([[0.1, 0.8, 0.1], [0.9, 0.05, 0.05], [0.1, 0.2, 0.7]])
    weights = tf.lookup.StaticHashTable(
        tf.lookup.KeyValueTensorInitializer(tf.constant([1, 2]), tf.constant([2.0, 3.0])), default_value=1.0)
    b = fe.backend.categorical_crossentropy(y_pred=pred, y_true=true)  # 0.228
    b = fe.backend.categorical_crossentropy(y_pred=pred, y_true=true, average_loss=False)  # [0.223, 0.105, 0.356]
    b = fe.backend.categorical_crossentropy(y_pred=pred, y_true=true, average_loss=False, class_weights=weights)
    # [0.446, 0.105, 1.068]
    ```

    This method can be used with PyTorch tensors:
    ```python
    true = torch.tensor([[0, 1, 0], [1, 0, 0], [0, 0, 1]])
    pred = torch.tensor([[0.1, 0.8, 0.1], [0.9, 0.05, 0.05], [0.1, 0.2, 0.7]])
    weights = {1: 2.0, 2: 3.0}
    b = fe.backend.categorical_crossentropy(y_pred=pred, y_true=true)  # 0.228
    b = fe.backend.categorical_crossentropy(y_pred=pred, y_true=true, average_loss=False)  # [0.223, 0.105, 0.356]
    b = fe.backend.categorical_crossentropy(y_pred=pred, y_true=true, average_loss=False, class_weights=weights)
    # [0.446, 0.105, 1.068]
    ```

    Args:
        y_pred: Prediction with a shape like (Batch, C). dtype: float32 or float16.
        y_true: Ground truth class labels with a shape like `y_pred`. dtype: int or float32 or float16.
        from_logits: Whether y_pred is from logits. If True, a sigmoid will be applied to the prediction.
        average_loss: Whether to average the element-wise loss.
        class_weights: Mapping of class indices to a weight for weighting the loss function. Useful when you need to pay
            more attention to samples from an under-represented class.

    Returns:
        The categorical crossentropy between `y_pred` and `y_true`. A scalar if `average_loss` is True, else a
        tensor with the shape (Batch).

    Raises:
        AssertionError: If `y_true` or `y_pred` are unacceptable data types.
    """
    assert isinstance(
        y_pred,
        (tf.Tensor,
         torch.Tensor)), "only support tf.Tensor or torch.Tensor as y_pred"
    assert isinstance(
        y_true,
        (tf.Tensor,
         torch.Tensor)), "only support tf.Tensor or torch.Tensor as y_true"
    if tf.is_tensor(y_pred):
        ce = tf.losses.categorical_crossentropy(y_pred=y_pred,
                                                y_true=y_true,
                                                from_logits=from_logits)
        if class_weights is not None:
            sample_weights = class_weights.lookup(
                tf.math.argmax(y_true,
                               axis=-1,
                               output_type=class_weights.key_dtype))
            ce = ce * sample_weights
    else:
        y_true = y_true.to(torch.float)
        ce = _categorical_crossentropy_torch(y_pred=y_pred,
                                             y_true=y_true,
                                             from_logits=from_logits)
        if class_weights is not None:
            y_class = torch.argmax(y_true, dim=-1)
            sample_weights = torch.ones_like(y_class, dtype=torch.float)
            for key in class_weights.keys():
                sample_weights[y_class == key] = class_weights[key]
            ce = ce * sample_weights.reshape(ce.shape)

    if average_loss:
        ce = reduce_mean(ce)
    return ce
コード例 #12
0
def binary_crossentropy(y_pred: Tensor,
                        y_true: Tensor,
                        from_logits: bool = False,
                        average_loss: bool = True,
                        class_weights: Optional[Weight_Dict] = None) -> Tensor:
    """Compute binary crossentropy.

    This method is applicable when there are only two label classes (zero and one). There should be a single floating
    point prediction per example.

    This method can be used with TensorFlow tensors:
    ```python
    true = tf.constant([[1], [0], [1], [0]])
    pred = tf.constant([[0.9], [0.3], [0.8], [0.1]])
    weights = tf.lookup.StaticHashTable(
        tf.lookup.KeyValueTensorInitializer(tf.constant([1]), tf.constant([2.0])), default_value=1.0)
    b = fe.backend.binary_crossentropy(y_pred=pred, y_true=true)  # 0.197
    b = fe.backend.binary_crossentropy(y_pred=pred, y_true=true, average_loss=False)  # [0.105, 0.356, 0.223, 0.105]
    b = fe.backend.binary_crossentropy(y_pred=pred, y_true=true, average_loss=False, class_weights=weights)
    # [0.210, 0.356, 0.446, 0.105]
    ```

    This method can be used with PyTorch tensors:
    ```python
    true = torch.tensor([[1], [0], [1], [0]])
    pred = torch.tensor([[0.9], [0.3], [0.8], [0.1]])
    weights = {1: 2.0}
    b = fe.backend.binary_crossentropy(y_pred=pred, y_true=true)  # 0.197
    b = fe.backend.binary_crossentropy(y_pred=pred, y_true=true, average_loss=False)  # [0.105, 0.356, 0.223, 0.105]
    b = fe.backend.binary_crossentropy(y_pred=pred, y_true=true, average_loss=False, class_weights=weights)
    # [0.210, 0.356, 0.446, 0.105]
    ```

    Args:
        y_pred: Prediction with a shape like (batch, ...). dtype: float32 or float16.
        y_true: Ground truth class labels with the same shape as `y_pred`. dtype: int or float32 or float16.
        from_logits: Whether y_pred is from logits. If True, a sigmoid will be applied to the prediction.
        average_loss: Whether to average the element-wise loss.
        class_weights: Mapping of class indices to a weight for weighting the loss function. Useful when you need to pay
            more attention to samples from an under-represented class.

    Returns:
        The binary crossentropy between `y_pred` and `y_true`. A scalar if `average_loss` is True, else a tensor with
        the same shape as `y_true`.

    Raises:
        AssertionError: If `y_true` or `y_pred` are unacceptable data types.
    """
    assert isinstance(y_pred, torch.Tensor) or tf.is_tensor(
        y_pred), "only support tf.Tensor or torch.Tensor as y_pred"
    assert isinstance(y_true, torch.Tensor) or tf.is_tensor(
        y_true), "only support tf.Tensor or torch.Tensor as y_true"
    if tf.is_tensor(y_pred):
        ce = tf.losses.binary_crossentropy(y_pred=y_pred,
                                           y_true=tf.reshape(
                                               y_true, tf.shape(y_pred)),
                                           from_logits=from_logits)
        if class_weights is not None:
            sample_weights = class_weights.lookup(
                tf.cast(tf.reshape(y_true, tf.shape(ce)),
                        dtype=class_weights.key_dtype))
            ce = ce * sample_weights

        ce = tf.reshape(ce, [tf.shape(ce)[0], -1])
        ce = tf.reduce_mean(ce, 1)
    else:
        y_true = y_true.to(torch.float)
        if from_logits:
            ce = torch.nn.BCEWithLogitsLoss(reduction="none")(
                input=y_pred, target=y_true.view(y_pred.size()))
        else:
            ce = torch.nn.BCELoss(reduction="none")(input=y_pred,
                                                    target=y_true.view(
                                                        y_pred.size()))

        if class_weights is not None:
            sample_weights = torch.ones_like(y_true, dtype=torch.float)
            for key in class_weights.keys():
                sample_weights[y_true == key] = class_weights[key]
            ce = ce * sample_weights.reshape(ce.shape)

        ce = ce.view(ce.shape[0], -1)
        ce = torch.mean(ce, dim=1)

    if average_loss:
        ce = reduce_mean(ce)
    return ce