Example #1
0
 def _mixup_forward(self, F, pred, label1, label2, lam, sample_weight=None):
     if not self._from_logits:
         pred = F.log_softmax(pred, self._axis)
     if self._sparse_label:
         loss1 = -F.pick(pred, label1, axis=self._axis, keepdims=True)
         loss2 = -F.pick(pred, label2, axis=self._axis, keepdims=True)
         loss = lam * loss1 + (1 - lam) * loss2
     else:
         label1 = _reshape_like(F, label1, pred)
         label2 = _reshape_like(F, label2, pred)
         loss1 = -F.sum(pred*label1, axis=self._axis, keepdims=True)
         loss2 = -F.sum(pred*label2, axis=self._axis, keepdims=True)
         loss = lam * loss1 + (1 - lam) * loss2
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Example #2
0
 def _mixup_forward(self, F, pred, label1, label2, lam, sample_weight=None):
     if not self._from_logits:
         pred = F.log_softmax(pred, self._axis)
     if self._sparse_label:
         loss1 = -F.pick(pred, label1, axis=self._axis, keepdims=True)
         loss2 = -F.pick(pred, label2, axis=self._axis, keepdims=True)
         loss = lam * loss1 + (1 - lam) * loss2
     else:
         label1 = _reshape_like(F, label1, pred)
         label2 = _reshape_like(F, label2, pred)
         loss1 = -F.sum(pred * label1, axis=self._axis, keepdims=True)
         loss2 = -F.sum(pred * label2, axis=self._axis, keepdims=True)
         loss = lam * loss1 + (1 - lam) * loss2
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Example #3
0
    def hybrid_forward(self, F, pred, label, sample_weight=None):
        #각 label 문장의 마지막 문자('END') 인덱스 정보 추출
        label = F.cast(label, dtype='float32')
        label_sent_length = F.argmax(F.where(label == self.end_idx,
                                             F.ones_like(label),
                                             F.zeros_like(label)),
                                     axis=1)

        if not self._from_logits:
            pred = F.log_softmax(pred, self._axis)
        if self._sparse_label:
            loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
        else:
            label = _reshape_like(F, label, pred)
            loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
        loss = _apply_weighting(F, loss, self._weight, sample_weight)
        #(N, 30, val)
        #길이를 초과하는 영역에 대해서 0로 loss 마스킹을 수행함
        loss = F.transpose(loss, (1, 0, 2))
        loss = F.SequenceMask(loss,
                              sequence_length=label_sent_length + 1,
                              use_sequence_length=True)
        loss = F.transpose(loss, (1, 0, 2))
        return F.sum(loss, axis=self._batch_axis,
                     exclude=True) / (label_sent_length + 1)
Example #4
0
 def hybrid_forward(self,
                    F,
                    pred,
                    label,
                    sample_weight=None,
                    pos_weight=None):
     label = _reshape_like(F, label, pred)
     if not self._from_sigmoid:
         if pos_weight is None:
             # We use the stable formula: max(x, 0) - x * z + log(1 + exp(-abs(x)))
             loss = F.relu(pred) - pred * label + F.Activation(
                 -F.abs(pred), act_type='softrelu')
         else:
             # We use the stable formula: x - x * z + (1 + z * pos_weight - z) * \
             #    (log(1 + exp(-abs(x))) + max(-x, 0))
             log_weight = 1 + F.broadcast_mul(pos_weight - 1, label)
             loss = pred - pred * label + log_weight * (F.Activation(
                 -F.abs(pred), act_type='softrelu') + F.relu(-pred))
     else:
         eps = 1e-12
         if pos_weight is None:
             loss = -(F.log(pred + eps) * label + F.log(1. - pred + eps) *
                      (1. - label))
         else:
             loss = -(
                 F.broadcast_mul(F.log(pred + eps) * label, pos_weight) +
                 F.log(1. - pred + eps) * (1. - label))
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [
            _as_list(x) for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1:
            # no positive samples found, return dummy losses
            return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,))

        # compute element-wise cross entropy loss and sort, then perform
        # negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < (
                pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where(
                (pos + hard_negative) > 0,
                cls_loss,
                nd.zeros_like(cls_loss))
            cls_losses.append(
                nd.sum(
                    cls_loss,
                    axis=0,
                    exclude=True) /
                num_pos_all)

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(
                box_loss > self._rho,
                box_loss - 0.5 * self._rho,
                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(
                nd.sum(
                    box_loss,
                    axis=0,
                    exclude=True) /
                num_pos_all)
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
Example #6
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     """Forward"""
     pred = F.log(pred)
     if self._sparse_label:
         loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
     else:
         label = _reshape_like(F, label, pred)
         loss = -F.sum(pred*label, axis=self._axis, keepdims=True)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Example #7
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        pos_ct = [ct > 0 for ct in cls_target]
        num_pos = [ct.sum() for ct in pos_ct]
        num_pos_all = sum([p.asscalar() for p in num_pos])
        # print ('num_pos_all: {}'.format(num_pos_all))
        if num_pos_all < 1 and self._min_hard_negatives < 1:
            # no positive samples and no hard negatives, return dummy losses
            cls_losses = [nd.sum(cp * 0) for cp in cls_pred]
            box_losses = [nd.sum(bp * 0) for bp in box_pred]
            sum_losses = [
                nd.sum(cp * 0) + nd.sum(bp * 0)
                for cp, bp in zip(cls_pred, box_pred)
            ]
            return sum_losses, cls_losses, box_losses

        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            # print ('cp shape: {}'.format(cp.shape))
            # print ('bp shape: {}'.format(bp.shape))
            # print ('ct shape: {}'.format(ct.shape))
            # print ('bt shape: {}'.format(bt.shape))
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < nd.maximum(
                self._min_hard_negatives,
                pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss,
                                nd.zeros_like(cls_loss))
            cls_losses.append(
                nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all))

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho,
                                box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(
                nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all))
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
Example #8
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     if not self._from_logits:
         pred = F.log_softmax(pred, self._axis)
     if self._sparse_label:
         loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
     else:
         label = _reshape_like(F, label, pred)
         loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     diceloss = self.dice_loss(F, pred, label)
     return F.mean(loss, axis=self._batch_axis, exclude=True) + diceloss
Example #9
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        # synchronize across different machines
        # print('before sync:', num_pos_all)
        if self._distributed:
            num_pos_out = nd.zeros(1, mx.cpu())
            num_pos_in = nd.zeros(1, mx.cpu()) + num_pos_all
            # allreduce only supports pushpull
            if 'allreduce' in self._kv_store_type:
                self._kv_store.pushpull(self._num_pos_key, num_pos_in, num_pos_out)
            else:
                self._kv_store.push(self._num_pos_key, num_pos_in)
                # self._kv_store._barrier()
                self._kv_store.pull(self._num_pos_key, out=num_pos_out)
            num_pos_all = num_pos_out.asscalar()
        # print('after sync:', num_pos_all)
        if num_pos_all < 1:
            # no positive samples found, return dummy losses
            return nd.zeros((1,)), nd.zeros((1,)), nd.zeros((1,))

        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < (pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss))
            cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / num_pos_all)

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / num_pos_all)
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     label = _reshape_like(F, label, pred)
     if not self._from_sigmoid:
         max_val = F.relu(-pred)
         loss = pred - pred * label + max_val + F.log(F.exp(-max_val) + F.exp(-pred - max_val))
     else:
         p = mx.nd.array(1 / (1 + nd.exp(-pred)), ctx=ctx)
         weights = nd.exp(label + (1 - label * 2) * batch_ratios)
         gamma = 2
         w_p, w_n = nd.power(1. - p, gamma), nd.power(p, gamma)
         loss = - (w_p * F.log(p + 1e-12) * label + w_n * F.log(1. - p + 1e-12) * (1. - label))
         loss *= weights
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Example #11
0
 def hybrid_forward(self, F, pred, label):
     """Compute loss"""
     softmaxout = F.SoftmaxOutput(
         pred, label.astype(pred.dtype), ignore_label=self._ignore_label,
         multi_output=self._sparse_label,
         use_ignore=True, normalization='valid' if self._size_average else 'null')
     if self._sparse_label:
         loss = -F.pick(F.log(softmaxout), label, axis=1, keepdims=True)
     else:
         label = _reshape_like(F, label, pred)
         loss = -F.sum(F.log(softmaxout) * label, axis=-1, keepdims=True)
     loss = F.where(label.expand_dims(axis=1) == self._ignore_label,
                    F.zeros_like(loss), loss)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Example #12
0
    def hybrid_forward(self, F, pred, label):
        label = _reshape_like(F, label, pred)
        sample_weight = label != self._ignore_label
        label = F.where(sample_weight, label, F.zeros_like(label))

        if not self._from_sigmoid:
            loss = F.relu(pred) - pred * label + \
                F.Activation(-F.abs(pred), act_type='softrelu')
        else:
            eps = 1e-12
            loss = -(F.log(pred + eps) * label + F.log(1. - pred + eps) *
                     (1. - label))

        loss = _apply_weighting(F, loss, self._weight, sample_weight)
        return F.mean(loss, axis=self._batch_axis, exclude=True)
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     label = _reshape_like(F, label, pred)
     if not self._from_sigmoid:
         max_val = F.relu(-pred)
         loss = pred - pred * label + max_val + F.log(F.exp(-max_val) + F.exp(-pred - max_val))
     else:
         p = mx.nd.array(1 / (1 + nd.exp(-pred)), ctx=ctx)
         if epoch >= history_track and not args.test:
             p_hist = prediction_history[:, batch_id * args.batch_size: (batch_id + 1) * args.batch_size, :]
             p_std = (np.var(p_hist, axis=0) + (np.var(p_hist, axis=0)**2)/(p_hist.shape[0] - 1))**.5
             std_weights = nd.array(1 + p_std, ctx=ctx)
             loss = - std_weights * (F.log(p + 1e-12) * label + F.log(1. - p + 1e-12) * (1. - label))
         else:
             loss = - (F.log(p + 1e-12) * label + F.log(1. - p + 1e-12) * (1. - label))
     return F.mean(loss, axis=self._batch_axis, exclude=True)
    def hybrid_forward(self, F, orign_arr, dest_arr, sample_weight=None):
        '''
        Forward propagation, computing L2 norm.

        Args:
            F:           `mxnet.ndarray` or `mxnet.symbol`.
            orign_arr:   `mxnet.ndarray` or `mxnet.symbol` of origins.
            dest_arr:    `mxnet.ndarray` or `mxnet.symbol` of destinations.
        
        Returns:
            `mxnet.ndarray` or `mxnet.symbol` of loss.
        '''
        dest_arr = _reshape_like(F, dest_arr, orign_arr)
        loss = F.sqrt(F.mean(F.square(orign_arr - dest_arr), axis=1))
        loss = _apply_weighting(F, loss, self._weight, sample_weight)
        return F.mean(loss, axis=self._batch_axis, exclude=True)
Example #15
0
    def hybrid_forward(self, F, images, cls_target, box_target, r, idx_r):
        if self.bulk_last_wgrad:
            # make the last wgrad use the copy of the input
            # so it joins the bulk
            images = F.identity(images)
        cls_pred, box_pred = self.net(images)

        # loss needs to be done in FP32
        cls_pred = cls_pred.astype(dtype='float32')
        box_pred = box_pred.astype(dtype='float32')

        pred = F.log_softmax(cls_pred, axis=-1)
        pos = cls_target > 0
        pos_num = pos.sum(axis=1)

        cls_loss = -F.pick(pred, cls_target, axis=-1, keepdims=False)
        idx = (cls_loss * (pos - 1)).argsort(axis=1)
        # use scatter_nd to save one argsort
        idx_c = idx.reshape((1, -1)).squeeze(axis=0)  # column indices
        idx = F.stack(idx_r, idx_c)
        rank = F.scatter_nd(r, idx, (self.s0, self.s1))
        hard_negative = F.broadcast_lesser(
            rank,
            F.maximum(self._min_hard_negatives,
                      pos.sum(axis=1) *
                      self._negative_mining_ratio).expand_dims(-1))
        # mask out if not positive or negative
        cls_loss = F.where((pos + hard_negative) > 0, cls_loss,
                           F.zeros_like(cls_loss))
        cls_loss = F.sum(cls_loss, axis=0, exclude=True)

        box_pred = _reshape_like(F, box_pred, box_target)
        box_loss = F.abs(box_pred - box_target)
        box_loss = F.smooth_l1(data=box_loss, scalar=1.0)
        # box loss only apply to positive samples
        box_loss = F.broadcast_mul(box_loss, pos.expand_dims(axis=-1))
        box_loss = F.sum(box_loss, axis=0, exclude=True)

        # normalize loss with num_pos_per_image
        # see https://github.com/mlperf/training/blob/master/single_stage_detector/ssd/base_model.py#L201-L204
        num_mask = (pos_num > 0).astype('float32')
        pos_num = pos_num.astype('float32').clip(a_min=1e-6, a_max=8732)
        sum_loss = (num_mask * (cls_loss + self._lambd * box_loss) /
                    pos_num).mean(axis=0)

        return sum_loss
Example #16
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1 and self._min_hard_negatives < 1:
            # no positive samples and no hard negatives, return dummy losses
            cls_losses = [nd.sum(cp * 0) for cp in cls_pred]
            box_losses = [nd.sum(bp * 0) for bp in box_pred]
            sum_losses = [nd.sum(cp * 0) + nd.sum(bp * 0) for cp, bp in zip(cls_pred, box_pred)]
            return sum_losses, cls_losses, box_losses


        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < nd.maximum(self._min_hard_negatives, pos.sum(axis=1)
                                              * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss))
            cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all))

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all))
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
Example #17
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     """Loss forward"""
     if not self._from_logits:
         pred = F.sigmoid(pred)
     if self._sparse_label:
         one_hot = F.one_hot(label, self._num_class)
         one_hot = _reshape_like(F, one_hot, pred)
     else:
         one_hot = label > 0
     pt = F.where(one_hot, pred, 1 - pred)
     t = F.ones_like(one_hot)
     alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t)
     loss = -alpha * (
         (1 - pt)**self._gamma) * F.log(F.minimum(pt + self._eps, 1))
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     if self._size_average:
         return F.mean(loss, axis=self._batch_axis, exclude=True)
     else:
         return F.sum(loss, axis=self._batch_axis, exclude=True)
Example #18
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     """Compute loss"""
     if not self._from_logits:
         pred = F.log_softmax(pred, axis=self._axis)
     if self._sparse_label:
         if self._size_average:
             valid_label_map = (label != self._ignore_label).astype('float32')
         loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
         loss = F.where(label.expand_dims(axis=self._axis) == self._ignore_label,
                        F.zeros_like(loss), loss)
     else:
         label = _reshape_like(F, label, pred)
         loss = -F.sum(pred*label, axis=self._axis, keepdims=True)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     if self._size_average and self._sparse_label:
         return F.mean(loss, axis=self._batch_axis, exclude=True) * \
             valid_size / F.sum(valid_label_map)
     else:
         return F.mean(loss, axis=self._batch_axis, exclude=True)
Example #19
0
 def hybrid_forward(self, F, pred, label, mask, sample_weight=None):
     label = _reshape_like(F, label, pred)
     loss = F.abs(label * mask - pred * mask)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     norm = F.sum(mask).clip(1, 1e30)
     return F.sum(loss) / norm
Example #20
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices.
        
        Parameters
        ----------
        cls_pred : mxnet.nd.NDArray
        Predicted classes.
        box_pred : mxnet.nd.NDArray
        Predicted bounding-boxes.
        cls_target : mxnet.nd.NDArray
        Ground-truth classes.
        box_target : mxnet.nd.NDArray
        Ground-truth bounding-boxes.
        
        Returns
        -------
        tuple of NDArrays
            sum_losses : array with containing the sum of class prediction and bounding-box regression loss.
            cls_losses : array of class prediction loss.
            box_losses : array of box regression L1 loss.
        
        """
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1 and self._min_hard_negatives < 1:
            # no positive samples and no hard negatives, return dummy losses
            cls_losses = [nd.sum(cp * 0) for cp in cls_pred]
            box_losses = [nd.sum(bp * 0) for bp in box_pred]
            sum_losses = [
                nd.sum(cp * 0) + nd.sum(bp * 0)
                for cp, bp in zip(cls_pred, box_pred)
            ]
            return sum_losses, cls_losses, box_losses

        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < nd.maximum(
                self._min_hard_negatives,
                pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss,
                                nd.zeros_like(cls_loss))
            cls_losses.append(
                nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all))

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho,
                                box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(
                nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all))
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     label = _reshape_like(F, label, pred)
     loss = F.square(pred - label)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Example #22
0
    def hybrid_forward(self,
                       F,
                       pretext_pred_arr,
                       pred_arr,
                       pretext_label_arr,
                       label_arr,
                       sample_weight=None):
        '''
        Forward propagation, computing losses.

        Args:
            F:                      `mxnet.ndarray` or `mxnet.symbol`.
            pretext_pred_arr:       `mxnet.ndarray` or `mxnet.symbol` of predicted data in pretext, or target domain.
            pred_arr:               `mxnet.ndarray` or `mxnet.symbol` of inferenced labeled feature points in source domain.
            pretext_label_arr:      `mxnet.ndarray` or `mxnet.symbol` of label data in pretext.
            label_arr:              `mxnet.ndarray` or `mxnet.symbol` of label data in source domain.

            sample_weight:          element-wise weighting tensor. 
                                    Must be broadcastable to the same shape as label. 
                                    For example, if label has shape (64, 10) and you want to weigh 
                                    each sample in the batch separately, sample_weight should have shape (64, 1).

        Returns:
            `mxnet.ndarray` or `mxnet.symbol` of loss.
        '''
        if not self._from_logits:
            if self.__log_softmax_flag is True:
                pred_arr = F.log_softmax(pred_arr, self._axis)
            else:
                pred_arr = pred_arr - F.reshape(
                    F.max(pred_arr, axis=self._axis), shape=(-1, 1))
                pred_arr = F.exp(pred_arr)
                pred_arr = pred_arr / F.reshape(
                    F.sum(pred_arr, axis=self._axis), shape=(-1, 1))

        if self._sparse_label:
            classification_loss_arr = -F.pick(
                pred_arr, label_arr, axis=self._axis, keepdims=True)
        else:
            label_arr = _reshape_like(F, label_arr, pred_arr)
            classification_loss_arr = -F.sum(
                pred_arr * label_arr, axis=self._axis, keepdims=True)

        if self.__grad_clip_threshold > 0:
            classification_loss_norm = F.norm(classification_loss_arr)
            if classification_loss_norm.asscalar(
            ) > self.__grad_clip_threshold:
                classification_loss_arr = classification_loss_arr * self.__grad_clip_threshold / classification_loss_norm

        pretext_label_arr = _reshape_like(F, pretext_label_arr,
                                          pretext_pred_arr)
        pretext_loss_arr = -F.sum(pretext_pred_arr * pretext_label_arr,
                                  axis=self._axis,
                                  keepdims=True) / 4

        if self.__grad_clip_threshold > 0:
            pretext_loss_norm = F.norm(pretext_loss_arr)
            if pretext_loss_norm.asscalar() > self.__grad_clip_threshold:
                pretext_loss_arr = pretext_loss_arr * self.__grad_clip_threshold / pretext_loss_norm

        if self.__classification_weight is None:
            classification_loss_arr = _apply_weighting(
                F, classification_loss_arr, self._weight, sample_weight)
        else:
            classification_loss_arr = _apply_weighting(
                F, classification_loss_arr, self.__classification_weight,
                sample_weight)

        if self.__pretext_weight is None:
            pretext_loss_arr = _apply_weighting(F, pretext_loss_arr,
                                                self._weight, sample_weight)
        else:
            pretext_loss_arr = _apply_weighting(F, pretext_loss_arr,
                                                self.__pretext_weight,
                                                sample_weight)

        classification_loss = F.mean(classification_loss_arr,
                                     axis=self._batch_axis,
                                     exclude=True)
        pretext_loss = F.mean(pretext_loss_arr,
                              axis=self._batch_axis,
                              exclude=True)

        total_loss = classification_loss + pretext_loss
        return total_loss, classification_loss, pretext_loss
Example #23
0
    def hybrid_forward(self,
                       F,
                       decoded_arr,
                       pred_arr,
                       observed_arr,
                       label_arr,
                       sample_weight=None):
        '''
        Forward propagation, computing losses.

        Args:
            F:                      `mxnet.ndarray` or `mxnet.symbol`.
            decoded_arr:            `mxnet.ndarray` or `mxnet.symbol` of decoded feature points.
            pred_arr:               `mxnet.ndarray` or `mxnet.symbol` of inferenced labeled feature points.
            observed_arr:           `mxnet.ndarray` or `mxnet.symbol` of observed data points.
            label_arr:              `mxnet.ndarray` or `mxnet.symbol` of label data.
            sample_weight:          element-wise weighting tensor. 
                                    Must be broadcastable to the same shape as label. 
                                    For example, if label has shape (64, 10) and you want to weigh 
                                    each sample in the batch separately, sample_weight should have shape (64, 1).

        Returns:
            `mxnet.ndarray` or `mxnet.symbol` of loss.
        '''
        if not self._from_logits:
            if self.__log_softmax_flag is True:
                pred_arr = F.log_softmax(pred_arr, self._axis)
            else:
                pred_arr = pred_arr - F.reshape(
                    F.max(pred_arr, axis=self._axis), shape=(-1, 1))
                pred_arr = F.exp(pred_arr)
                pred_arr = pred_arr / F.reshape(
                    F.sum(pred_arr, axis=self._axis), shape=(-1, 1))

        if self._sparse_label:
            classification_loss_arr = -F.pick(
                pred_arr, label_arr, axis=self._axis, keepdims=True)
        else:
            label_arr = _reshape_like(F, label_arr, pred_arr)
            classification_loss_arr = -F.sum(
                pred_arr * label_arr, axis=self._axis, keepdims=True)

        if self.__grad_clip_threshold > 0:
            classification_loss_norm = F.norm(classification_loss_arr)
            if classification_loss_norm.asscalar(
            ) > self.__grad_clip_threshold:
                classification_loss_arr = classification_loss_arr * self.__grad_clip_threshold / classification_loss_norm

        if self.__classification_weight is None:
            classification_loss_arr = _apply_weighting(
                F, classification_loss_arr, self._weight, sample_weight)
        else:
            classification_loss_arr = _apply_weighting(
                F, classification_loss_arr, self.__classification_weight,
                sample_weight)

        classification_loss_arr = _apply_weighting(F, classification_loss_arr,
                                                   self.__rc_lambda,
                                                   sample_weight)
        classification_loss = F.mean(classification_loss_arr,
                                     axis=self._batch_axis,
                                     exclude=True)

        observed_arr = _reshape_like(F, observed_arr, decoded_arr)
        reconstruction_loss_arr = F.square(observed_arr - decoded_arr)

        if self.__grad_clip_threshold > 0:
            reconstruction_loss_norm = F.norm(reconstruction_loss_arr)
            if reconstruction_loss_norm.asscalar(
            ) > self.__grad_clip_threshold:
                reconstruction_loss_arr = reconstruction_loss_arr * self.__grad_clip_threshold / reconstruction_loss_norm

        if self.__reconstruction_weight is None:
            reconstruction_loss_arr = _apply_weighting(
                F, reconstruction_loss_arr, self._weight / 2, sample_weight)
        else:
            reconstruction_loss_arr = _apply_weighting(
                F, reconstruction_loss_arr, self.__reconstruction_weight / 2,
                sample_weight)

        reconstruction_loss_arr = _apply_weighting(F, reconstruction_loss_arr,
                                                   (1 - self.__rc_lambda),
                                                   sample_weight)
        reconstruction_loss = F.mean(reconstruction_loss_arr,
                                     axis=self._batch_axis,
                                     exclude=True)

        return classification_loss + reconstruction_loss, classification_loss, reconstruction_loss
Example #24
0
    def hybrid_forward(self, F, pred, label, sample_weight=None):
        """Compute YOLOv3 losses.

        :param pred:    (B, N, 4)
        :param label:   (B, N, 4)
        :param sample_weight:
        :return:
        """
        label = F.stop_gradient(label)
        label = gloss._reshape_like(F, label, pred)
        # pred = pred.reshape(-1, 4).T
        # label = label.reshape(-1, 4).T
        # pred = F.transpose(pred)
        # label = F.transpose(label)
        if self.x1y1x2y2:
            b1_xmin, b1_ymin, b1_xmax, b1_ymax = F.split(pred,
                                                         axis=-1,
                                                         num_outputs=4)
            b2_xmin, b2_ymin, b2_xmax, b2_ymax = F.split(label,
                                                         axis=-1,
                                                         num_outputs=4)
        else:
            b1_xmin, b1_ymin, b1_xmax, b1_ymax = self._center2corner(pred)
            b2_xmin, b2_ymin, b2_xmax, b2_ymax = self._center2corner(label)

        # Intersection area
        MAX = 1e5
        inter_w = F.clip(
            F.elemwise_sub(F.minimum(b1_xmax, b2_xmax),
                           F.maximum(b1_xmin, b2_xmin)), 0, MAX)
        inter_h = F.clip(
            F.elemwise_sub(F.minimum(b1_ymax, b2_ymax),
                           F.maximum(b1_ymin, b2_ymin)), 0, MAX)
        # inter_w = F.where(inter_w < 0., F.zeros_like(inter_w), inter_w)
        # inter_h = F.where(inter_h < 0., F.zeros_like(inter_h), inter_h)
        inter = F.elemwise_mul(inter_w, inter_h)

        # Union Area
        w1, h1 = F.elemwise_sub(b1_xmax,
                                b1_xmin), F.elemwise_sub(b1_ymax, b1_ymin)
        w2, h2 = F.elemwise_sub(b2_xmax,
                                b2_xmin), F.elemwise_sub(b2_ymax, b2_ymin)
        # w1 = F.where(w1 < 0., F.zeros_like(w1), w1)
        # h1 = F.where(h1 < 0., F.zeros_like(h1), h1)
        # w2 = F.where(w2 < 0., F.zeros_like(w2), w2)
        # h2 = F.where(h2 < 0., F.zeros_like(h2), h2)
        union = F.elemwise_mul(w1, h1) + F.elemwise_mul(w2, h2)

        iou = F.elemwise_div(inter, union + 1e-16)  # iou

        # From: https://github.com/ultralytics/yolov3
        # GIOU
        cw = F.elemwise_sub(
            F.maximum(b1_xmax, b2_xmax),
            F.minimum(b1_xmin,
                      b2_xmin))  # convex (smallest enclosing box) width
        ch = F.elemwise_sub(F.maximum(b1_ymax, b2_ymax),
                            F.minimum(b1_ymin, b2_ymin))  # convex height
        # cw = F.where(cw < 0., F.zeros_like(cw), cw)
        # ch = F.where(ch < 0., F.zeros_like(ch), ch)
        if self.loss_type == 'giou':
            c_area = F.elemwise_mul(cw, ch) + 1e-16  # convex area
            giou = iou - (c_area - union) / c_area  # GIoU
            loss = 1. - giou
        else:
            # convex diagonal squared
            c2 = cw**2 + ch**2 + 1e-16
            # centerpoint distance squared
            rho2 = F.square((b2_xmin + b2_xmax) -
                            (b1_xmin + b1_xmax)) / 4 + F.square(
                                ((b2_ymin + b2_ymax) -
                                 (b1_ymin + b1_ymax))) / 4
            if self.loss_type == 'diou':
                diou = iou - rho2 / c2
                loss = 1. - diou
            elif self.loss_type == 'ciou':
                v = (4 / mx.np.pi**2) * F.power(
                    F.arctan(w2 / (h2 + 1e-16)) - F.arctan(w1 /
                                                           (h1 + 1e-16)), 2)
                # TODO without pause(), coverage will be faster
                with mx.autograd.pause():
                    alpha = v / (1. - iou + v + 1e-16)
                    alpha = F.stop_gradient(alpha)
                ciou = iou - (rho2 / c2 + v * alpha)
                loss = 1. - ciou
            else:
                raise ValueError(
                    f'unknown loss_type: {self.loss_type}, available: giou, diou, ciou'
                )
        loss = gloss._apply_weighting(F, loss, self._weight, sample_weight)
        if gloss.is_np_array():
            if F is mx.ndarray:
                return F.np.mean(loss, axis=tuple(range(1, loss.ndim)))
            else:
                return F.npx.batch_flatten(loss).mean(axis=1)
        else:
            return F.mean(loss, axis=self._batch_axis, exclude=True)