コード例 #1
0
def weighted_smooth_l1_loss(input, target, weights, size_average=None, reduce=None, reduction="mean"):
    # type: (Tensor, Tensor, Optional[bool], Optional[bool], str) -> Tensor
    r"""Function that uses a squared term if the absolute
    element-wise error falls below 1 and an L1 term otherwise.

    See :class:`~torch.nn.SmoothL1Loss` for details.
    """
    if not (target.size() == input.size()):
        warnings.warn(
            "Using a target size ({}) that is different to the input size ({}). "
            "This will likely lead to incorrect results due to broadcasting. "
            "Please ensure they have the same size.".format(target.size(), input.size()),
            stacklevel=2,
        )
    if size_average is not None or reduce is not None:
        reduction = _Reduction.legacy_get_string(size_average, reduce)
    if target.requires_grad:
        ret = _weighted_smooth_l1_loss(input, target, weights)
        if reduction != "none":
            ret = torch.mean(ret) if reduction == "mean" else torch.sum(ret)
    else:
        raise (ValueError("haven't thought this through"))
        expanded_input, expanded_target = torch.broadcast_tensors(input, target)
        ret = torch._C._nn.smooth_l1_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
    return ret
コード例 #2
0
    def __init__(self, kernel_size: int = 11, kernel_sigma: float = 1.5, k1: float = 0.01, k2: float = 0.03,
                 scale_weights: Optional[Union[Tuple[float], List[float]]] = None, size_average: Optional[bool] = None,
                 reduce: Optional[bool] = None, reduction: str = 'mean', data_range: Union[int, float] = 1.) -> None:
        super(MultiScaleSSIMLoss, self).__init__(size_average, reduce, reduction)

        # Generic loss parameters.
        self.size_average = size_average
        self.reduce = reduce
        if size_average is not None or reduce is not None:
            reduction = _Reduction.legacy_get_string(size_average, reduce)

        self.reduction = reduction

        # Loss-specific parameters.
        if scale_weights is None:
            scale_weights_from_ms_ssim_paper = [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]
            scale_weights = scale_weights_from_ms_ssim_paper

        self.scale_weights_tensor = torch.tensor(scale_weights)
        self.kernel_size = kernel_size
        self.kernel_sigma = kernel_sigma
        self.k1 = k1
        self.k2 = k2
        self.data_range = data_range

        # Cash kernel between calls.
        self.kernel = _fspecial_gauss_1d(kernel_size, kernel_sigma)
コード例 #3
0
def ssim(input: Tensor,
         target: Tensor,
         max_val: float,
         filter_size: int = 11,
         k1: float = 0.01,
         k2: float = 0.03,
         sigma: float = 1.5,
         size_average=None,
         reduce=None,
         reduction: str = 'mean') -> Tensor:
    """Measures the structural similarity index (SSIM) error."""
    dim = input.dim()
    if dim != 4:
        raise ValueError('Expected 4 dimensions (got {})'.format(dim))

    if input.size() != target.size():
        raise ValueError(
            'Expected input size ({}) to match target size ({}).'.format(
                input.size(0), target.size(0)))

    if size_average is not None or reduce is not None:
        reduction = _Reduction.legacy_get_string(size_average, reduce)

    channel = input.size(1)
    kernel = _fspecial_gaussian(filter_size,
                                channel,
                                sigma,
                                device=input.device,
                                dtype=input.dtype,
                                max_size=input.shape[-2:])
    ret, _ = _ssim(input, target, max_val, k1, k2, channel, kernel)

    if reduction != 'none':
        ret = torch.mean(ret) if reduction == 'mean' else torch.sum(ret)
    return ret
コード例 #4
0
def cross_entropy(input, target, weight=None, size_average=None, ignore_index=-100,
                  reduce=None, reduction='mean'):
    # type: (Tensor, Tensor, Optional[Tensor], Optional[bool], int, Optional[bool], str) -> Tensor

    if size_average is not None or reduce is not None:
        reduction = _Reduction.legacy_get_string(size_average, reduce)
    return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
コード例 #5
0
    def SmoothL1Loss_custom(input,
                            target,
                            sigma=3.0,
                            size_average=None,
                            reduce=True,
                            reduction='mean'):
        # type: (Tensor, Tensor, Optional[bool], Optional[bool], str) -> Tensor
        def _smooth_l1_loss(input, target, sigma=3.0):
            # type: (Tensor, Tensor) -> Tensor
            t = torch.abs(input - target)
            return torch.where(t < 1, 0.5 * t**2, t - 0.5)

        if not (target.size() == input.size()):
            warnings.warn(
                "Using a target size ({}) that is different to the input size ({}). "
                "This will likely lead to incorrect results due to broadcasting. "
                "Please ensure they have the same size.".format(
                    target.size(), input.size()),
                stacklevel=2)
        if size_average is not None or reduce is not None:
            reduction = _Reduction.legacy_get_string(size_average, reduce)
        if target.requires_grad:
            ret = _smooth_l1_loss(input, target)
            if reduction != 'none':
                ret = torch.mean(ret) if reduction == 'mean' else torch.sum(
                    ret)
        else:
            expanded_input, expanded_target = torch.broadcast_tensors(
                input, target)
            ret = torch._C._nn.smooth_l1_loss(expanded_input, expanded_target,
                                              _Reduction.get_enum(reduction))
        return ret
コード例 #6
0
 def __init__(self,
              size_average=None,
              reduce=None,
              reduction: str = 'mean') -> None:
     super(_Loss, self).__init__()
     if size_average is not None or reduce is not None:
         self.reduction = _Reduction.legacy_get_string(size_average, reduce)
     else:
         self.reduction = reduction
コード例 #7
0
def nll_loss(input,
             target,
             weight=None,
             size_average=None,
             ignore_index=-100,
             reduce=None,
             reduction='mean'):
    # type: (Tensor, Tensor, Optional[Tensor], Optional[bool], int, Optional[bool], str) -> Tensor

    if size_average is not None or reduce is not None:
        reduction = _Reduction.legacy_get_string(size_average, reduce)
    dim = input.dim()
    if dim < 2:
        raise ValueError('Expected 2 or more dimensions (got {})'.format(dim))

    if input.size(0) != target.size(0):
        raise ValueError(
            'Expected input batch_size ({}) to match target batch_size ({}).'.
            format(input.size(0), target.size(0)))
    if dim == 2:
        ret = torch._C._nn.nll_loss(input, target, weight,
                                    _Reduction.get_enum(reduction),
                                    ignore_index)
    elif dim == 4:
        ret = torch._C._nn.nll_loss2d(input, target, weight,
                                      _Reduction.get_enum(reduction),
                                      ignore_index)
    else:
        # dim == 3 or dim > 4
        n = input.size(0)
        c = input.size(1)
        out_size = (n, ) + input.size()[2:]
        if target.size()[1:] != input.size()[2:]:
            raise ValueError('Expected target size {}, got {}'.format(
                out_size, target.size()))
        input = input.contiguous()
        target = target.contiguous()
        # support empty batches, see #15870
        if input.numel() > 0:
            input = input.view(n, c, 1, -1)
        else:
            input = input.view(n, c, 0, 0)
        if target.numel() > 0:
            target = target.view(n, 1, -1)
        else:
            target = target.view(n, 0, 0)
        reduction_enum = _Reduction.get_enum(reduction)
        if reduction != 'none':
            ret = torch._C._nn.nll_loss2d(input, target, weight,
                                          reduction_enum, ignore_index)
        else:
            out = torch._C._nn.nll_loss2d(input, target, weight,
                                          reduction_enum, ignore_index)
            ret = out.view(out_size)
    return ret
コード例 #8
0
def ms_ssim(input: Tensor,
            target: Tensor,
            max_val: float,
            filter_size: int = 11,
            k1: float = 0.01,
            k2: float = 0.03,
            sigma: float = 1.5,
            size_average=None,
            reduce=None,
            reduction: str = 'mean') -> Tensor:
    """Measures the multi-scale structural similarity index (MS-SSIM) error."""
    dim = input.dim()
    if dim != 4:
        raise ValueError(
            'Expected 4 dimensions (got {}) from input'.format(dim))

    if input.size() != target.size():
        raise ValueError(
            'Expected input size ({}) to match target size ({}).'.format(
                input.size(0), target.size(0)))

    if size_average is not None or reduce is not None:
        reduction = _Reduction.legacy_get_string(size_average, reduce)

    channel = input.size(1)
    kernel = _fspecial_gaussian(filter_size,
                                channel,
                                sigma,
                                device=input.device,
                                dtype=input.dtype,
                                max_size=input.shape[-2:])

    weights = ms_weights(input.device).unsqueeze(-1).unsqueeze(-1)
    levels = weights.size(0)
    mssim = []
    mcs = []
    for i in range(levels):

        if i:
            input = avg_pool2d(input, kernel_size=2, ceil_mode=True)
            target = avg_pool2d(target, kernel_size=2, ceil_mode=True)

        if min(size := input.shape[-2:]) <= filter_size:
            kernel = _fspecial_gaussian(filter_size,
                                        channel,
                                        sigma,
                                        device=input.device,
                                        dtype=input.dtype,
                                        max_size=size)

        ssim, cs = _ssim(input, target, max_val, k1, k2, channel, kernel)
        ssim = ssim.mean((2, 3))
        cs = cs.mean((2, 3))
        mssim.append(ssim)
        mcs.append(cs)
コード例 #9
0
ファイル: multibox_loss_ex.py プロジェクト: 584669/code
def smooth_ex_loss(input, target, size_average=None, reduce=None, reduction='mean'):
    if not (target.size() == input.size()):
        warnings.warn("Using a target size ({}) that is different to the input size ({}). "
                      "This will likely lead to incorrect results due to broadcasting. "
                      "Please ensure they have the same size.".format(target.size(), input.size()),
                      stacklevel=2)
    if size_average is not None or reduce is not None:
        reduction = _Reduction.legacy_get_string(size_average, reduce)
    ret = _smooth_ex_loss(input, target)
    if reduction != 'none':
        ret = torch.mean(ret) if reduction == 'mean' else torch.sum(ret)
    return ret
コード例 #10
0
    def forward(self, input: torch.Tensor, target):
        reduction = self.reduction
        if self.size_average is not None or self.reduce is not None:
            reduction = _Reduction.legacy_get_string(self.size_average,
                                                     self.reduce)
        pt = F.softmax(input, dim=1)
        log_pt = torch.log(pt)
        w = torch.pow((1 - pt), self.gamma)
        f_l = F.nll_loss(w * log_pt, target, self.weight, None,
                         self.ignore_index, None, reduction)

        return f_l
コード例 #11
0
def smoothed_nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean',
                      smooth_eps=None, smooth_dist=None, size_average=None, reduce=None):
    """cross entropy loss, with support for target distributions and label smoothing https://arxiv.org/abs/1512.00567"""

    if size_average is not None or reduce is not None:
        reduction = _reduction.legacy_get_string(size_average, reduce)

    smooth_eps = smooth_eps or 0

    # ordinary log-liklihood - use cross_entropy from nn
    if _is_long(target) and smooth_eps == 0:
        return F.nll_loss(inputs, target, weight, ignore_index=ignore_index, reduction=reduction, size_average=size_average, reduce=reduce)

    lsm = inputs

    masked_indices = None
    num_classes = inputs.size(-1)

    if _is_long(target) and ignore_index >= 0:
        masked_indices = target.eq(ignore_index)

    if smooth_eps > 0 and smooth_dist is not None:
        if _is_long(target):
            target = onehot(target, num_classes).type_as(inputs)
        if smooth_dist.dim() < target.dim():
            smooth_dist = smooth_dist.unsqueeze(0)
        target.lerp_(smooth_dist, smooth_eps)

    if weight is not None:
        lsm = lsm * weight.unsqueeze(0)

    if _is_long(target):
        eps_sum = smooth_eps / num_classes
        eps_nll = 1. - eps_sum - smooth_eps
        likelihood = lsm.gather(dim=-1, index=target.unsqueeze(-1)).squeeze(-1)
        loss = -(eps_nll * likelihood + eps_sum * lsm.sum(-1))
    else:
        loss = -(target * lsm).sum(-1)

    if masked_indices is not None:
        loss.masked_fill_(masked_indices, 0)

    if reduction == 'sum':
        loss = loss.sum()
    elif reduction == 'mean':
        if masked_indices is None:
            loss = loss.mean()
        else:
            loss = loss.sum() / float(loss.size(0) - masked_indices.sum())

    return loss
コード例 #12
0
ファイル: dice_loss.py プロジェクト: kadimakipp/network
 def __init__(self,
              weight=None,
              smooth=True,
              size_average=None,
              reduce=None,
              reduction='mean'):
     super(DiceLoss, self).__init__()
     if size_average is not None or reduce is not None:
         self.reduction = _Reduction.legacy_get_string(size_average, reduce)
     else:
         self.reduction = reduction
     if smooth:
         self.smooth = 1.
     else:
         self.smooth = 0.
     self.weight = weight
コード例 #13
0
def ssim_loss(input,
              target,
              max_val,
              filter_size=11,
              k1=0.01,
              k2=0.03,
              sigma=1.5,
              kernel=None,
              size_average=None,
              reduce=None,
              reduction='mean'):
    r"""ssim_loss(input, target, max_val, filter_size, k1, k2,
                  sigma, kernel=None, size_average=None, reduce=None, reduction='mean') -> Tensor
    Measures the structural similarity index (SSIM) error.
    See :class:`~torch.nn.SSIMLoss` for details.
    """

    if input.size() != target.size():
        raise ValueError(
            'Expected input size ({}) to match target size ({}).'.format(
                input.size(0), target.size(0)))

    if size_average is not None or reduce is not None:
        reduction = _Reduction.legacy_get_string(size_average, reduce)

    dim = input.dim()
    if dim == 2:
        input = input.expand(1, 1, input.dim(-2), input.dim(-1))
        target = target.expand(1, 1, target.dim(-2), target.dim(-1))
    elif dim == 3:
        input = input.expand(1, input.dim(-3), input.dim(-2), input.dim(-1))
        target = target.expand(1, target.dim(-3), target.dim(-2),
                               target.dim(-1))
    elif dim != 4:
        raise ValueError('Expected 2, 3, or 4 dimensions (got {})'.format(dim))

    _, channel, _, _ = input.size()

    if kernel is None:
        kernel = _fspecial_gaussian(filter_size, channel, sigma)
    kernel = kernel.to(device=input.device)

    ret, _ = _ssim(input, target, max_val, k1, k2, channel, kernel)

    if reduction != 'none':
        ret = torch.mean(ret) if reduction == 'mean' else torch.sum(ret)
    return ret
コード例 #14
0
    def __init__(self, kernel_size: int = 11, kernel_sigma: float = 1.5, k1: float = 0.01, k2: float = 0.03,
                 size_average: Optional[bool] = None, reduce: Optional[bool] = None,
                 reduction: str = 'mean', data_range: Union[int, float] = 1.) -> None:
        super(SSIMLoss, self).__init__(size_average, reduce, reduction)

        # Generic loss parameters.
        self.size_average = size_average
        self.reduce = reduce
        if size_average is not None or reduce is not None:
            reduction = _Reduction.legacy_get_string(size_average, reduce)

        self.reduction = reduction

        # Loss-specific parameters.
        self.kernel_size = kernel_size
        self.kernel_sigma = kernel_sigma
        self.k1 = k1
        self.k2 = k2
        self.data_range = data_range

        # Cash kernel between calls.
        self.kernel = _fspecial_gauss_1d(kernel_size, kernel_sigma)
コード例 #15
0
ファイル: ssim.py プロジェクト: blackPython/mipae
def ssim_loss(input,
              target,
              max_val,
              filter_size=11,
              k1=0.01,
              k2=0.03,
              sigma=1.5,
              kernel=None,
              size_average=None,
              reduce=None,
              reduction='mean'):
    r"""ssim_loss(input, target, max_val, filter_size, k1, k2,
                  sigma, kernel=None, size_average=None, reduce=None, reduction='mean') -> Tensor
    Measures the structural similarity index (SSIM) error.
    See :class:`~torch.nn.SSIMLoss` for details.
    """

    if input.size() != target.size():
        raise ValueError(
            'Expected input size ({}) to match target size ({}).'.format(
                input.size(0), target.size(0)))

    if size_average is not None or reduce is not None:
        reduction = _Reduction.legacy_get_string(size_average, reduce)

    dim = input.dim()
    if dim == 2:
        input = input.expand(1, 1, input.dim(-2), input.dim(-1))
        target = target.expand(1, 1, target.dim(-2), target.dim(-1))
    elif dim == 3:
        input = input.expand(1, input.dim(-3), input.dim(-2), input.dim(-1))
        target = target.expand(1, target.dim(-3), target.dim(-2),
                               target.dim(-1))
    elif dim != 4:
        raise ValueError('Expected 2, 3, or 4 dimensions (got {})'.format(dim))

    _, channel, _, _ = input.size()

    if kernel is None:
        kernel = _fspecial_gaussian(filter_size, channel, sigma)
    kernel = kernel.to(device=input.device)

    ret, _ = _ssim(input, target, max_val, k1, k2, channel, kernel)

    if reduction != 'none':
        ret = torch.mean(ret) if reduction == 'mean' else torch.sum(ret)
    return ret


# From PyTorch:

# Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
# Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
# Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
# Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
# Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
# Copyright (c) 2011-2013 NYU                      (Clement Farabet)
# Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
# Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
# Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)

# From Caffe2:

# Copyright (c) 2016-present, Facebook Inc. All rights reserved.

# All contributions by Facebook:
# Copyright (c) 2016 Facebook Inc.

# All contributions by Google:
# Copyright (c) 2015 Google Inc.
# All rights reserved.

# All contributions by Yangqing Jia:
# Copyright (c) 2015 Yangqing Jia
# All rights reserved.

# All contributions from Caffe:
# Copyright(c) 2013, 2014, 2015, the respective contributors
# All rights reserved.

# All other contributions:
# Copyright(c) 2015, 2016 the respective contributors
# All rights reserved.

# Caffe2 uses a copyright model similar to Caffe: each contributor holds
# copyright over their contributions to Caffe2. The project versioning records
# all such contribution and copyright details. If a contributor wants to further
# mark their specific copyright on a particular contribution, they should
# indicate their copyright solely in the commit message of the change when it is
# committed.

# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:

# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.

# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.

# 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
#    and IDIAP Research Institute nor the names of its contributors may be
#    used to endorse or promote products derived from this software without
#    specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
コード例 #16
0
def cross_entropy_without_softmax(input, target, weight=None, size_average=None, ignore_index=-100,
                  reduce=None, reduction='mean'):
    if size_average is not None or reduce is not None:
        reduction = _Reduction.legacy_get_string(size_average, reduce)
    return F.nll_loss(torch.log(input), target, weight, None, ignore_index, None, reduction)
コード例 #17
0
def re_bow_loss(input,
                target,
                prior,
                kl_annealing=1.,
                weight=None,
                size_average=None,
                ignore_index=0,
                reduce=None,
                reduction='mean',
                _DEBUG=False):
    # type: (Tensor, Tensor, Distribution, float, Optional[Tensor], Optional[bool], int, Optional[bool], str) -> Tensor
    r"""This criterion combines `log` and word-wise (at sentence level) likelihood in a single
    function.

    See :class:`~torch.nn.CrossEntropyLoss` for details.

    # TODO: This needs to be re-written.
    Args:
        input (Tensor) : :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`
            in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)` where :math:`K > 1`
            in the case of K-dimensional loss.
        target (Tensor) : :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`,
            or :math:`(N, d_1, d_2, ..., d_K)` where :math:`K \geq 1` for
            K-dimensional loss.
        weight (Tensor, optional): a manual rescaling weight given to each
            class. If given, has to be a Tensor of size `C`
        size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
            the losses are averaged over each loss element in the batch. Note that for
            some losses, there multiple elements per sample. If the field :attr:`size_average`
            is set to ``False``, the losses are instead summed for each minibatch. Ignored
            when reduce is ``False``. Default: ``True``
        ignore_index (int, optional): Specifies a target value that is ignored
            and does not contribute to the input gradient. When :attr:`size_average` is
            ``True``, the loss is averaged over non-ignored targets. Default: -100
        reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
            losses are averaged or summed over observations for each minibatch depending
            on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
            batch element instead and ignores :attr:`size_average`. Default: ``True``
        reduction (string, optional): Specifies the reduction to apply to the output:
            'none' | 'mean' | 'sum'. 'none': no reduction will be applied,
            'mean': the sum of the output will be divided by the number of
            elements in the output, 'sum': the output will be summed. Note: :attr:`size_average`
            and :attr:`reduce` are in the process of being deprecated, and in the meantime,
            specifying either of those two args will override :attr:`reduction`. Default: 'mean'

    Examples::

        >>> input = torch.randn(3, 5, requires_grad=True)
        >>> target = torch.randint(5, (3,), dtype=torch.int64)
        >>> loss = bag_of_words_log_loss(input, target)
        >>> loss.backward()
    """

    if size_average is not None or reduce is not None:
        reduction = _Reduction.legacy_get_string(size_average, reduce)

    relation_probs = input[0]
    word_probs = input[1]
    word_labels = target[1]

    # Creates a masked target tensor, where it is assumed that the labels indices will start at 1.
    mask = (word_labels != ignore_index).long()
    masked_word_labels = word_labels * mask

    # Here we compute the logarithm of the estimated word probabilities.
    log_word_probs = torch.log(word_probs)

    # Adds a column of zeros to the beginning of the log_word_probs. This will allow to batch the computation while
    # ignoring the 'ignore_idx'.
    masked_log_word_probs = torch.cat((torch.zeros(
        word_probs.shape[0], 1, device=mask.device), log_word_probs),
                                      dim=1)

    # Here we compute the expectation, with respect to the predicted relation probabilities, of the logarithm of the
    # predicted word probabilities.
    masked_expected_log_word_probs = torch.matmul(relation_probs,
                                                  masked_log_word_probs)

    # Gets the probabilities of the words in each sentence (including the ignore_index ones, but those won't contribute
    # to the result).
    expected_log_words_in_sentences_probs = torch.gather(
        masked_expected_log_word_probs, 1, masked_word_labels)

    # Computes the probability of each sequence.
    expected_sequence_log_prob = torch.sum(
        expected_log_words_in_sentences_probs, dim=1)

    # Here we compute the KL divergence of the predicted relation probabilities against the specified prior.
    kls = kl_divergence(Categorical(probs=relation_probs), prior)

    # If we are 'DEBUGGING', i.e. tracking KL and P(X) values, we compute those values here.
    if (_DEBUG):
        # We compute P(x) = sum_{r \in \mathcal{R}} P(r)*P(x|r) = \frac{1}{|\mathcal{R}|} sum_{r \in \mathcal{R}} P(x|r)
        # We actually compute the log of that quantity.

        # First we expand the word probabilities to allow for each training instance to select from each relation.
        ex_word_probs = masked_log_word_probs.unsqueeze(0).expand(
            masked_word_labels.shape[0], -1, -1)
        ex_word_probs = ex_word_probs.reshape(
            ex_word_probs.shape[0] * ex_word_probs.shape[1],
            ex_word_probs.shape[2])

        # Then we expand the labels, for the same reason.
        ex_labels = masked_word_labels.unsqueeze(1).expand(
            -1, word_probs.shape[0], -1)
        ex_labels = ex_labels.reshape(ex_labels.shape[0] * ex_labels.shape[1],
                                      ex_labels.shape[2])

        # We gather the relevant probabilities.
        log_p_x = torch.gather(ex_word_probs, 1, ex_labels)

        # We reshape log(P(X)).
        log_p_x = log_p_x.reshape(int(log_p_x.shape[0] / word_probs.shape[0]),
                                  word_probs.shape[0], log_p_x.shape[1])
        # We compute the log probability of each sentence, per relation.
        summed_log_p_x = log_p_x.sum(dim=-1)
        # We compute the per instance final value of log(P(x)), by employing the log-sum-exp trick.
        log_p_x = torch.logsumexp(summed_log_p_x, dim=-1) - torch.log(
            torch.tensor(word_probs.shape[0]).float())

        # Compute whatever reduction is necessary.
        if (reduction == 'mean' or reduction == 'sum'):
            batch_log_p_x = torch.sum(log_p_x)
            batch_kls = torch.sum(kls)
            if (reduction == 'mean'):
                batch_log_p_x /= log_p_x.shape[0]
                batch_kls /= kls.shape[0]

    # Compute the instance-wise loss.
    instance_wise_loss = expected_sequence_log_prob - kl_annealing * kls

    # Compute whatever reduction is necessary.
    if (reduction == 'mean' or reduction == 'sum'):
        batch_log_prob = torch.sum(instance_wise_loss)
        if (reduction == 'mean'):
            batch_log_prob /= instance_wise_loss.shape[0]
        if (_DEBUG):
            return (-batch_log_prob, batch_kls, -batch_log_p_x)
        else:
            return (-batch_log_prob, )

    if (_DEBUG):
        return (-instance_wise_loss, kls, -log_p_x)
    else:
        return (-instance_wise_loss, )
コード例 #18
0
def bag_of_words_log_loss(input,
                          target,
                          weight=None,
                          size_average=None,
                          ignore_index=0,
                          reduce=None,
                          reduction='mean'):
    # type: (Tensor, Tensor, Optional[Tensor], Optional[bool], int, Optional[bool], str) -> Tensor
    r"""This criterion combines `log` and word-wise (at sentence level) likelihood in a single
    function.

    See :class:`~torch.nn.CrossEntropyLoss` for details.

    # TODO: This needs to be re-written.
    Args:
        input (Tensor) : :math:`(N, C)` where `C = number of classes` or :math:`(N, C, H, W)`
            in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)` where :math:`K > 1`
            in the case of K-dimensional loss.
        target (Tensor) : :math:`(N)` where each value is :math:`0 \leq \text{targets}[i] \leq C-1`,
            or :math:`(N, d_1, d_2, ..., d_K)` where :math:`K \geq 1` for
            K-dimensional loss.
        weight (Tensor, optional): a manual rescaling weight given to each
            class. If given, has to be a Tensor of size `C`
        size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
            the losses are averaged over each loss element in the batch. Note that for
            some losses, there multiple elements per sample. If the field :attr:`size_average`
            is set to ``False``, the losses are instead summed for each minibatch. Ignored
            when reduce is ``False``. Default: ``True``
        ignore_index (int, optional): Specifies a target value that is ignored
            and does not contribute to the input gradient. When :attr:`size_average` is
            ``True``, the loss is averaged over non-ignored targets. Default: -100
        reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
            losses are averaged or summed over observations for each minibatch depending
            on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
            batch element instead and ignores :attr:`size_average`. Default: ``True``
        reduction (string, optional): Specifies the reduction to apply to the output:
            'none' | 'mean' | 'sum'. 'none': no reduction will be applied,
            'mean': the sum of the output will be divided by the number of
            elements in the output, 'sum': the output will be summed. Note: :attr:`size_average`
            and :attr:`reduce` are in the process of being deprecated, and in the meantime,
            specifying either of those two args will override :attr:`reduction`. Default: 'mean'

    Examples::

        >>> input = torch.randn(3, 5, requires_grad=True)
        >>> target = torch.randint(5, (3,), dtype=torch.int64)
        >>> loss = bag_of_words_log_loss(input, target)
        >>> loss.backward()
    """

    if size_average is not None or reduce is not None:
        reduction = _Reduction.legacy_get_string(size_average, reduce)

    word_probs = input[0]
    word_labels = target[0]

    # Creates a masked target tensor, where it is assumed that the labels indices will start at 1.
    mask = (word_labels != ignore_index).long()
    masked_word_labels = word_labels * mask

    # Adds a column of ones to the beginning of the input. This will allow to batch the computation ignoring the
    # 'ignore_idx'.
    input_accounting_for_masking = torch.cat(
        (torch.ones(word_probs.shape[0], 1, device=mask.device), word_probs),
        dim=1)

    # Gets the probabilities of the words in each sentence (including the ignore_index ones, but those won't contribute
    # to the result).
    word_log_probs = torch.log(
        torch.gather(input_accounting_for_masking, 1, masked_word_labels))

    # Computes the probability of each sequence.
    sequence_log_prob = torch.sum(word_log_probs, dim=1)

    # Compute whatever reduction is necessary.
    if (reduction == 'mean' or reduction == 'sum'):
        batch_log_prob = torch.sum(sequence_log_prob)
        if (reduction == 'mean'):
            batch_log_prob /= sequence_log_prob.shape[0]
        return -batch_log_prob

    return -sequence_log_prob