Exemple #1
0
    def train(self, x, y, actions=None):
        actions = actions.astype(np.int32)
        batch_size = len(actions)

        if self._gpu_device:
            x = cuda.to_gpu(x, self._gpu_device)
            y = cuda.to_gpu(y, self._gpu_device)
            actions = cuda.to_gpu(actions, self._gpu_device)

        q = self._model(x)
        q_subset = F.reshape(F.select_item(q, actions), (batch_size, 1))
        y = y.reshape(batch_size, 1)

        loss = F.sum(F.huber_loss(q_subset, y, 1.0))

        self._model.cleargrads()
        loss.backward()
        self._optimizer.update()

        self._loss_val = np.asscalar(cuda.to_cpu(loss.data))

        # Keeps track of the number of train() calls
        self._steps += 1
        if self._steps % self._target_update_interval == 0:
            # copy weights
            self._target.copyparams(self._model)
    def check_forward(self, x_data, t_data):
        x = chainer.Variable(x_data)
        t = chainer.Variable(t_data)
        loss = functions.huber_loss(x, t, delta=1)
        self.assertEqual(loss.data.dtype, numpy.float32)
        loss_value = cuda.to_cpu(loss.data)

        diff_data = cuda.to_cpu(x_data) - cuda.to_cpu(t_data)
        expected_result = numpy.zeros(self.shape)
        mask = numpy.abs(diff_data) < 1
        expected_result[mask] = 0.5 * diff_data[mask] ** 2
        expected_result[~mask] = numpy.abs(diff_data[~mask]) - 0.5
        loss_expect = numpy.sum(expected_result, axis=1)
        testing.assert_allclose(loss_value, loss_expect)
    def check_forward(self, x_data, t_data):
        x = chainer.Variable(x_data)
        t = chainer.Variable(t_data)
        loss = functions.huber_loss(x, t, delta=1, reduce=self.reduce)
        self.assertEqual(loss.data.dtype, numpy.float32)
        loss_value = cuda.to_cpu(loss.data)

        diff_data = cuda.to_cpu(x_data) - cuda.to_cpu(t_data)
        loss_expect = numpy.zeros(self.shape)
        mask = numpy.abs(diff_data) < 1
        loss_expect[mask] = 0.5 * diff_data[mask] ** 2
        loss_expect[~mask] = numpy.abs(diff_data[~mask]) - 0.5
        if self.reduce == 'sum_along_second_axis':
            loss_expect = numpy.sum(loss_expect, axis=1)
        testing.assert_allclose(loss_value, loss_expect)
 def _calc_rpn_loss_bbox(self, rpn_bbox_pred, bbox_reg_targets, inds_inside):
     # rpn_bbox_pred has the shape of (1, 4 x n_anchors, feat_h, feat_w)
     n_anchors = self.proposal_layer._num_anchors
     # Reshape it into (4, A, K)
     rpn_bbox_pred = rpn_bbox_pred.reshape(4, n_anchors, -1)
     # Transpose it into (K, A, 4)
     rpn_bbox_pred = rpn_bbox_pred.transpose(2, 1, 0)
     # Reshape it into (K x A, 4)
     rpn_bbox_pred = rpn_bbox_pred.reshape(-1, 4)
     # Keep the number of bbox
     n_bbox = rpn_bbox_pred.shape[0]
     # Select bbox and ravel it
     rpn_bbox_pred = F.flatten(rpn_bbox_pred[inds_inside])
     # Create batch dimension
     rpn_bbox_pred = F.expand_dims(rpn_bbox_pred, 0)
     # Ravel the targets and create batch dimension
     bbox_reg_targets = bbox_reg_targets.ravel()[None, :]
     # Calc Smooth L1 Loss (When delta=1, huber loss is SmoothL1Loss)
     rpn_loss_bbox = F.huber_loss(rpn_bbox_pred, bbox_reg_targets,
                                  self._delta)
     rpn_loss_bbox /= n_bbox
     return rpn_loss_bbox.reshape(())
def multibox_loss(mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k, comm=None):
    """Computes multibox losses.

    This is a loss function used in [#]_.
    This function returns :obj:`loc_loss` and :obj:`conf_loss`.
    :obj:`loc_loss` is a loss for localization and
    :obj:`conf_loss` is a loss for classification.
    The formulas of these losses can be found in
    the equation (2) and (3) in the original paper.

    .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan,
       Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
       SSD: Single Shot MultiBox Detector. ECCV 2016.

    Args:
        mb_locs (chainer.Variable or array): The offsets and scales
            for predicted bounding boxes.
            Its shape is :math:`(B, K, 4)`,
            where :math:`B` is the number of samples in the batch and
            :math:`K` is the number of default bounding boxes.
        mb_confs (chainer.Variable or array): The classes of predicted
            bounding boxes.
            Its shape is :math:`(B, K, n\_class)`.
            This function assumes the first class is background (negative).
        gt_mb_locs (chainer.Variable or array): The offsets and scales
            for ground truth bounding boxes.
            Its shape is :math:`(B, K, 4)`.
        gt_mb_labels (chainer.Variable or array): The classes of ground truth
            bounding boxes.
            Its shape is :math:`(B, K)`.
        k (float): A coefficient which is used for hard negative mining.
            This value determines the ratio between the number of positives
            and that of mined negatives. The value used in the original paper
            is :obj:`3`.
        comm (~chainermn.communicators.CommunicatorBase):
            A ChainerMN communicator.
            If it is specified, the number of positive examples is computed
            among all GPUs.

    Returns:
        tuple of chainer.Variable:
        This function returns two :obj:`chainer.Variable`: :obj:`loc_loss` and
        :obj:`conf_loss`.
    """
    mb_locs = chainer.as_variable(mb_locs)
    mb_confs = chainer.as_variable(mb_confs)
    gt_mb_locs = chainer.as_variable(gt_mb_locs)
    gt_mb_labels = chainer.as_variable(gt_mb_labels)

    xp = chainer.backends.cuda.get_array_module(gt_mb_labels.array)
    with chainer.backends.cuda.get_device_from_array(gt_mb_labels.array):
        positive = gt_mb_labels.array > 0
        n_positive = positive.sum()

        if comm:
            n_positive = comm.allreduce_obj(n_positive) / comm.size

        if n_positive == 0:
            z = chainer.Variable(xp.zeros((), dtype=np.float32))
            return z, z

        loc_loss = F.huber_loss(mb_locs, gt_mb_locs, 1, reduce='no')
        loc_loss = F.sum(loc_loss, axis=-1)
        loc_loss *= positive.astype(loc_loss.dtype)
        loc_loss = F.sum(loc_loss) / n_positive

        conf_loss = _elementwise_softmax_cross_entropy(mb_confs, gt_mb_labels)
        hard_negative = _hard_negative(conf_loss.array, positive, k)
        conf_loss *= xp.logical_or(positive,
                                   hard_negative).astype(conf_loss.dtype)
        conf_loss = F.sum(conf_loss) / n_positive

    return loc_loss, conf_loss
    def _check_forward(self, mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k):
        if self.variable:
            mb_locs = chainer.Variable(mb_locs)
            mb_confs = chainer.Variable(mb_confs)
            gt_mb_locs = chainer.Variable(gt_mb_locs)
            gt_mb_labels = chainer.Variable(gt_mb_labels)

        loc_loss, conf_loss = multibox_loss(mb_locs, mb_confs, gt_mb_locs,
                                            gt_mb_labels, k)

        self.assertIsInstance(loc_loss, chainer.Variable)
        self.assertEqual(loc_loss.shape, ())
        self.assertEqual(loc_loss.dtype, mb_locs.dtype)

        self.assertIsInstance(conf_loss, chainer.Variable)
        self.assertEqual(conf_loss.shape, ())
        self.assertEqual(conf_loss.dtype, mb_confs.dtype)

        if self.variable:
            mb_locs = mb_locs.array
            mb_confs = mb_confs.array
            gt_mb_locs = gt_mb_locs.array
            gt_mb_labels = gt_mb_labels.array

        mb_locs = cuda.to_cpu(mb_locs)
        mb_confs = cuda.to_cpu(mb_confs)
        gt_mb_locs = cuda.to_cpu(gt_mb_locs)
        gt_mb_labels = cuda.to_cpu(gt_mb_labels)
        loc_loss = cuda.to_cpu(loc_loss.array)
        conf_loss = cuda.to_cpu(conf_loss.array)

        n_positive_total = 0
        expect_loc_loss = 0
        expect_conf_loss = 0
        for i in six.moves.xrange(gt_mb_labels.shape[0]):
            n_positive = 0
            negatives = []
            for j in six.moves.xrange(gt_mb_labels.shape[1]):
                loc = F.huber_loss(mb_locs[np.newaxis, i, j],
                                   gt_mb_locs[np.newaxis, i, j], 1).array
                conf = F.softmax_cross_entropy(mb_confs[np.newaxis, i, j],
                                               gt_mb_labels[np.newaxis, i,
                                                            j]).array

                if gt_mb_labels[i, j] > 0:
                    n_positive += 1
                    expect_loc_loss += loc
                    expect_conf_loss += conf
                else:
                    negatives.append(conf)

            n_positive_total += n_positive
            if n_positive > 0:
                expect_conf_loss += sum(sorted(negatives)[-n_positive * k:])

        if n_positive_total == 0:
            expect_loc_loss = 0
            expect_conf_loss = 0
        else:
            expect_loc_loss /= n_positive_total
            expect_conf_loss /= n_positive_total

        np.testing.assert_almost_equal(loc_loss, expect_loc_loss, decimal=2)
        np.testing.assert_almost_equal(conf_loss, expect_conf_loss, decimal=2)
Exemple #7
0
    def __call__(self, left, right, disp_true):

        refimg_fea = self.feature_extraction(left)
        targetimg_fea = self.feature_extraction(right)
        # matching
        # with chainer.no_backprop_mode():
        cost = None

        for i in range(int(self.maxdisp / 4)):
            if i > 0:
                # limit size i
                cost_i = F.concat(
                    (refimg_fea[:, :, :, i:], targetimg_fea[:, :, :, :-i]),
                    axis=1).reshape(refimg_fea.shape[0],
                                    refimg_fea.shape[1] * 2, 1,
                                    refimg_fea.shape[2],
                                    refimg_fea.shape[3] - i)
                cost_zero = Variable(
                    cuda.cupy.zeros(
                        (refimg_fea.shape[0], int(refimg_fea.shape[1] * 2), 1,
                         refimg_fea.shape[2], i),
                        dtype=cuda.cupy.float32))
                cost_i = F.concat((cost_zero, cost_i), axis=4)
                cost = F.concat((cost, cost_i), axis=2)
            else:
                cost = F.concat(
                    (refimg_fea, targetimg_fea),
                    axis=1).reshape(refimg_fea.shape[0],
                                    refimg_fea.shape[1] * 2, 1,
                                    refimg_fea.shape[2], refimg_fea.shape[3])

        # gpu0 to gpu1
        cost = F.copy(cost, self.gpu1)

        cost0 = self.dres0(cost)
        cost0 = self.dres1(cost0) + cost0
        cost0 = self.dres2(cost0) + cost0
        cost0 = self.dres3(cost0) + cost0
        cost0 = self.dres4(cost0) + cost0
        cost = self.classify(cost0)

        # gpu1 to gpu0
        cost = F.copy(cost, self.gpu0)

        cost = F.unpooling_nd(cost,
                              4,
                              outsize=(self.maxdisp, left.shape[2],
                                       left.shape[3]))
        cost = F.average_pooling_nd(cost, 3, 1, 1)
        # here insert average_pooling_nd(kernel=3, stride=1) for trilinear upsampling !!!
        cost = F.squeeze(cost, 1)
        pred = F.softmax(cost)  # ???
        pred = disparityregression(self.maxdisp)(pred)

        # calculate loss
        pred = F.clip(pred.reshape(pred.shape[0], -1), 0., float(self.maxdisp))
        disp_true = disp_true.reshape(disp_true.shape[0], -1)

        # mask
        if self.train_type == "kitti":
            pred_mask = F.where(disp_true > 0., pred, disp_true)
        elif self.train_type == "sceneflow":
            pred_mask = F.where(disp_true < maxdisp, pred, disp_true)
        else:
            pred_mask = pred

        #mask = Variable(disp_true).array < self.maxdisp
        loss = F.huber_loss(pred_mask, disp_true, delta=1)
        loss = F.average(loss / pred_mask.shape[1])

        chainer.reporter.report({'loss': loss}, self)

        if self.training:
            return loss
        else:
            return pred.reshape(1, 1, left.shape[2], right.shape[3])
Exemple #8
0
def mean_clipped_loss(y, t):
    return F.mean(F.huber_loss(y, t, delta=1.0, reduce='no'))
Exemple #9
0
 def f(x, t):
     return functions.huber_loss(x, t, delta=1, reduce=self.reduce)
Exemple #10
0
 def check_invalid_option(self, xp):
     x = xp.asarray(self.x)
     t = xp.asarray(self.t)
     with self.assertRaises(ValueError):
         functions.huber_loss(x, t, 1, 'invalid_option')
def multibox_focal_loss(mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k):
    """Computes multibox losses.
    This is a loss function used in [#]_.
    This function returns :obj:`loc_loss` and :obj:`conf_loss`.
    :obj:`loc_loss` is a loss for localization and
    :obj:`conf_loss` is a loss for classification.
    The formulas of these losses can be found in
    the equation (2) and (3) in the original paper.
    .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan,
       Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
       SSD: Single Shot MultiBox Detector. ECCV 2016.
    Args:
        mb_locs (chainer.Variable or array): The offsets and scales
            for predicted bounding boxes.
            Its shape is :math:`(B, K, 4)`,
            where :math:`B` is the number of samples in the batch and
            :math:`K` is the number of default bounding boxes.
        mb_confs (chainer.Variable or array): The classes of predicted
            bounding boxes.
            Its shape is :math:`(B, K, n\_class)`.
            This function assumes the first class is background (negative).
        gt_mb_locs (chainer.Variable or array): The offsets and scales
            for ground truth bounding boxes.
            Its shape is :math:`(B, K, 4)`.
        gt_mb_labels (chainer.Variable or array): The classes of ground truth
            bounding boxes.
            Its shape is :math:`(B, K)`.
        k (float): A coefficient which is used for hard negative mining.
            This value determines the ratio between the number of positives
            and that of mined negatives. The value used in the original paper
            is :obj:`3`.
    Returns:
        tuple of chainer.Variable:
        This function returns two :obj:`chainer.Variable`: :obj:`loc_loss` and
        :obj:`conf_loss`.
    """
    mb_locs = chainer.as_variable(mb_locs)
    mb_confs = chainer.as_variable(mb_confs)
    gt_mb_locs = chainer.as_variable(gt_mb_locs)
    #gt_mb_labels = chainer.as_variable(gt_mb_labels)

    xp = chainer.cuda.get_array_module(gt_mb_locs.array)

    #print(gt_mb_labels.array.device)
    #print('Multibox')
    #print(chainer.cuda.get_device_from_array(gt_mb_labels.array))

    #with gt_mb_labels.array.device:
    #positive = gt_mb_labels.array > 0
    positive = gt_mb_labels > 0
    n_positive = positive.sum()

    if n_positive == 0:
        z = chainer.Variable(xp.zeros((), dtype=np.float32))
        return z, z

    loc_loss = F.huber_loss(mb_locs, gt_mb_locs, 1, reduce='no')
    loc_loss = F.sum(loc_loss, axis=-1)
    loc_loss *= positive.astype(loc_loss.dtype)
    loc_loss = F.sum(loc_loss) / n_positive

    #conf_loss = _elementwise_softmax_cross_entropy(mb_confs, gt_mb_labels)
    #hard_negative = _hard_negative(conf_loss.array, positive, k)
    #conf_loss *= xp.logical_or(positive, hard_negative).astype(conf_loss.dtype)

    alpha = 0.75
    gamma = 2

    t = gt_mb_labels.reshape(gt_mb_labels.shape[0] * gt_mb_labels.shape[1], )
    class_num = mb_confs.shape[2]  # class_num includes back ground class
    t = F.cast(chainer.as_variable(xp.eye(class_num)[t]), loc_loss.dtype)
    t = t.reshape(gt_mb_labels.shape[0], gt_mb_labels.shape[1], class_num)

    p = F.sigmoid(mb_confs)
    #pt = p * t + (1 - p) * (1 - t) # pt = p if t > 0 else 1-p
    #w = alpha * t + (1 - alpha) * (1 - t)  # w = alpha if t > 0 else 1 - alpha
    #w = w * ((1 - pt) ** gamma)

    pt = F.where(t.array > 0, p, 1 - p)
    w = (1 - pt)**gamma
    w = F.where(t.array > 0, alpha * w, (1 - alpha) * w)

    # From Pytorch implemetation binary_cross_entropy_with_logits
    # https://pytorch.org/docs/master/_modules/torch/nn/functional.html#binary_cross_entropy_with_logits
    max_val = F.clip(-mb_confs, x_min=0.0, x_max=10.0e+12)
    focal_loss = mb_confs - mb_confs * t + max_val + F.log(
        F.exp(-max_val) + F.exp(-mb_confs - max_val))
    focal_loss = F.sum(focal_loss * w) / n_positive
    #focal_loss = -F.sum(w * F.log(pt + 1e-12)) / n_positive

    return loc_loss, focal_loss
Exemple #12
0
    def _check_forward(self, mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k):
        if self.variable:
            mb_locs = chainer.Variable(mb_locs)
            mb_confs = chainer.Variable(mb_confs)
            gt_mb_locs = chainer.Variable(gt_mb_locs)
            gt_mb_labels = chainer.Variable(gt_mb_labels)

        loc_loss, conf_loss = multibox_loss(
            mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k)

        self.assertIsInstance(loc_loss, chainer.Variable)
        self.assertEqual(loc_loss.shape, ())
        self.assertEqual(loc_loss.dtype, mb_locs.dtype)

        self.assertIsInstance(conf_loss, chainer.Variable)
        self.assertEqual(conf_loss.shape, ())
        self.assertEqual(conf_loss.dtype, mb_confs.dtype)

        if self.variable:
            mb_locs = mb_locs.array
            mb_confs = mb_confs.array
            gt_mb_locs = gt_mb_locs.array
            gt_mb_labels = gt_mb_labels.array

        mb_locs = cuda.to_cpu(mb_locs)
        mb_confs = cuda.to_cpu(mb_confs)
        gt_mb_locs = cuda.to_cpu(gt_mb_locs)
        gt_mb_labels = cuda.to_cpu(gt_mb_labels)
        loc_loss = cuda.to_cpu(loc_loss.array)
        conf_loss = cuda.to_cpu(conf_loss.array)

        n_positive_total = 0
        expect_loc_loss = 0
        expect_conf_loss = 0
        for i in six.moves.xrange(gt_mb_labels.shape[0]):
            n_positive = 0
            negatives = []
            for j in six.moves.xrange(gt_mb_labels.shape[1]):
                loc = F.huber_loss(
                    mb_locs[np.newaxis, i, j],
                    gt_mb_locs[np.newaxis, i, j], 1).array
                conf = F.softmax_cross_entropy(
                    mb_confs[np.newaxis, i, j],
                    gt_mb_labels[np.newaxis, i, j]).array

                if gt_mb_labels[i, j] > 0:
                    n_positive += 1
                    expect_loc_loss += loc
                    expect_conf_loss += conf
                else:
                    negatives.append(conf)

            n_positive_total += n_positive
            if n_positive > 0:
                expect_conf_loss += sum(sorted(negatives)[-n_positive * k:])

        if n_positive_total == 0:
            expect_loc_loss = 0
            expect_conf_loss = 0
        else:
            expect_loc_loss /= n_positive_total
            expect_conf_loss /= n_positive_total

        np.testing.assert_almost_equal(
            loc_loss, expect_loc_loss, decimal=2)
        np.testing.assert_almost_equal(
            conf_loss, expect_conf_loss, decimal=2)
Exemple #13
0
    def calc_loss(self,
                  image_size,
                  predicted_grids,
                  gt_bbox_points,
                  objectness_scores,
                  normalize=True):
        predicted_bbox_points = self.get_corners(predicted_grids,
                                                 image_size,
                                                 scale_to_image_size=False)

        # 1. transform box coordinates to aabb coordinates for determination of iou
        predicted_bbox_points = predicted_bbox_points[
            0], predicted_bbox_points[4], predicted_bbox_points[
                3], predicted_bbox_points[7]
        predicted_bbox_points = F.stack(predicted_bbox_points, axis=1)

        # 2. find best prediction area for each gt bbox
        gt_bboxes_to_use_for_loss = []
        positive_anchor_indices = self.xp.empty((0, ), dtype=self.xp.int32)
        not_contributing_anchors = self.xp.empty((0, ), dtype=self.xp.int32)
        for index, gt_bbox in enumerate(gt_bbox_points):
            # determine which bboxes are positive boxes as they have high iou with gt and also which bboxes are negative
            # this is also used to train objectness classification
            gt_bbox = self.xp.tile(gt_bbox[None, ...],
                                   (len(predicted_bbox_points), 1))

            ious = bbox_iou(gt_bbox, predicted_bbox_points.data)
            positive_boxes = self.xp.where((ious[0] >= 0.7))
            not_contributing_boxes = self.xp.where(
                self.xp.logical_and(0.3 < ious[0], ious[0] < 0.7))
            if len(positive_boxes[0]) == 0:
                best_iou_index = ious[0, :].argmax()
                positive_anchor_indices = self.xp.concatenate(
                    (positive_anchor_indices, best_iou_index[None, ...]),
                    axis=0)
                gt_bboxes_to_use_for_loss.append(gt_bbox[0])
            else:
                positive_anchor_indices = self.xp.concatenate(
                    (positive_anchor_indices, positive_boxes[0]), axis=0)
                gt_bboxes_to_use_for_loss.extend(
                    gt_bbox[:len(positive_boxes[0])])
            not_contributing_anchors = self.xp.concatenate(
                (not_contributing_anchors, not_contributing_boxes[0]), axis=0)

        if len(gt_bboxes_to_use_for_loss) == 0:
            return Variable(self.xp.array(0, dtype=predicted_grids.dtype))

        gt_bboxes_to_use_for_loss = F.stack(gt_bboxes_to_use_for_loss)

        # filter predicted bboxes and only keep bboxes from those regions that actually contain a bbox
        predicted_bbox_points = F.get_item(predicted_bbox_points,
                                           positive_anchor_indices)

        # 3. calculate L1 loss for bbox regression
        loss = F.huber_loss(predicted_bbox_points, gt_bboxes_to_use_for_loss,
                            1)

        # 4. calculate objectness loss
        objectness_labels = self.xp.zeros(len(objectness_scores),
                                          dtype=self.xp.int32)
        objectness_labels[not_contributing_anchors] = -1
        objectness_labels[positive_anchor_indices] = 1

        objectness_loss = F.softmax_cross_entropy(
            objectness_scores,
            objectness_labels,
            ignore_label=-1,
        )

        return F.mean(loss), objectness_loss
Exemple #14
0
def mean_clipped_loss(y, t):
    # Add an axis because F.huber_loss only accepts arrays with ndim >= 2
    y = F.expand_dims(y, axis=-1)
    t = F.expand_dims(t, axis=-1)
    return F.sum(F.huber_loss(y, t, 1.0)) / y.shape[0]
    def __call__(self, x, img_info, gt_boxes=None):
        """Faster RCNN forward

        Args:
            x (:class:`~chainer.Variable`): The input image. Note that the
                batchsize should be 1. So the shape should be
                :math:`(1, n_channels, height, width)`.
            img_info (:class:`~chainer.Variable`): The input image info. It
                contains :math:`(height, width)` and the batchsize should be 1.
                So the shape should be :math:`(1, 2)`.
            gt_boxes (:class:`~chainer.Variable`): The ground truth bounding
                boxes and its class label array. The shape should be
                :math:`(1, n_gt_boxes, 5)` and the batchsize should be 1.

        """
        if self.type_check_enable:
            self._check_data_type_forward(x, img_info, gt_boxes)

        # Use the array module of the backend of trunk model
        with cuda.get_device_from_array(x.data):
            xp, feature_map = self.trunk.xp, self.trunk(x)

            # RPN training mode
            if self.rpn_train and gt_boxes is not None:
                return self.RPN(feature_map, img_info, gt_boxes)
            else:
                proposals, probs = self.RPN(feature_map, img_info, gt_boxes)
                self.rpn_proposals = proposals
                self.rpn_probs = probs

            # RCNN
            batch_id = xp.zeros((len(proposals), 1), dtype=xp.float32)
            brois = xp.concatenate((batch_id, proposals), axis=1)
            pool5 = F.roi_pooling_2d(feature_map, brois, 7, 7,
                                     self._spatial_scale)
            fc6 = F.dropout(F.relu(self.fc6(pool5)), train=self.rcnn_train)
            fc7 = F.dropout(F.relu(self.fc7(fc6)), train=self.rcnn_train)

            # Per class probability
            cls_score = self.cls_score(fc7)

            # BBox predictions
            bbox_pred = self.bbox_pred(fc7)

            if self.rcnn_train and gt_boxes is not None:
                # Create proposal target layer if not exsist
                if not hasattr(self, 'proposal_target_layer'):
                    self.proposal_target_layer = ProposalTargetLayer(
                        self._feat_stride, self._anchor_ratios,
                        self._anchor_scales, self._num_classes)
                use_gt_boxes, bbox_reg_targets, keep_inds = \
                    self.proposal_target_layer(proposals, gt_boxes)

                # TODO(mitmul): Remove this re-sending below vars to GPU
                xp = self.RPN.xp
                if xp is cuda.cupy:
                    use_gt_boxes = xp.asarray(use_gt_boxes)
                    bbox_reg_targets = xp.asarray(bbox_reg_targets)
                    keep_inds = xp.asarray(keep_inds)

                # Select predicted scores and calc loss
                cls_score = cls_score[keep_inds]
                cls_labels = use_gt_boxes[:, -1].astype(xp.int32)
                loss_cls = F.softmax_cross_entropy(cls_score, cls_labels)
                loss_cls = loss_cls.reshape(())
                cls_acc = F.accuracy(cls_score, cls_labels, -1)

                # Select predicted bbox transformations and calc loss
                bbox_pred = bbox_pred[keep_inds]
                loss_bbox = F.huber_loss(bbox_pred, bbox_reg_targets,
                                         self._rcnn_delta)
                loss_bbox = F.sum(loss_bbox) / loss_bbox.size
                loss_bbox = loss_bbox.reshape(())

                loss_rcnn = loss_cls + loss_bbox

                reporter.report({'loss_cls': loss_cls,
                                 'cls_accuracy': cls_acc,
                                 'loss_bbox': loss_bbox,
                                 'loss_rcnn': loss_rcnn}, self)

                return loss_rcnn

            pred_boxes = bbox_transform_inv(proposals, bbox_pred.data)
            pred_boxes = clip_boxes(pred_boxes, img_info.data[0])

            return F.softmax(cls_score), pred_boxes
def multibox_loss(mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k,
                  binarize=False, arm_confs=None, arm_locs=None):
    """Computes multibox losses.

    Different from :obj:`chainercv.MultiboxCoder`, Cascared offset regression
    and negative anchor filtering and arm binarization loss is supported.

    This is a loss function used in [#]_.
    This function returns :obj:`loc_loss` and :obj:`conf_loss`.
    :obj:`loc_loss` is a loss for localization and
    :obj:`conf_loss` is a loss for classification.
    The formulas of these losses can be found in
    the equation (2) and (3) in the original paper.

    .. [#] Shifeng Zhang, Longyin Wen, Xiao Bian, Zhen Lei, Stan Z. Li.
       Single-Shot Refinement Neural Network for Object Detection.

    Args:
        mb_locs (chainer.Variable or array): The offsets and scales
            for predicted bounding boxes.
            Its shape is :math:`(B, K, 4)`,
            where :math:`B` is the number of samples in the batch and
            :math:`K` is the number of default bounding boxes.
        mb_confs (chainer.Variable or array): The classes of predicted
            bounding boxes.
            Its shape is :math:`(B, K, n\_class)`.
            This function assumes the first class is background (negative).
        gt_mb_locs (chainer.Variable or array): The offsets and scales
            for ground truth bounding boxes.
            Its shape is :math:`(B, K, 4)`.
        gt_mb_labels (chainer.Variable or array): The classes of ground truth
            bounding boxes.
            Its shape is :math:`(B, K)`.
        k (float): A coefficient which is used for hard negative mining.
            This value determines the ratio between the number of positives
            and that of mined negatives. The value used in the original paper
            is :obj:`3`.
        binarize(bool): If True, conf loss objective is binarized (Any class or
            background).
        arm_confs(chainer.Variable or None): If not `None`, negative anchor
            filtering is enabled. Indexes where :obj:`arm_confs` <= 0.01,
            will not be used to training.
        arm_locs(chainer.Variable or None): If not `None`, cascaded offset
            regression is enabled.

    Returns:
        tuple of chainer.Variable:
        This function returns two :obj:`chainer.Variable`: :obj:`loc_loss` and
        :obj:`conf_loss`.
    """
    variance = (0.1, 0.2)

    mb_locs = chainer.as_variable(mb_locs)
    mb_confs = chainer.as_variable(mb_confs)
    gt_mb_locs = chainer.as_variable(gt_mb_locs)
    gt_mb_labels = chainer.as_variable(gt_mb_labels)

    xp = chainer.cuda.get_array_module(gt_mb_labels.array)

    if arm_locs is not None:
        if isinstance(arm_locs, chainer.Variable):
            arm_locs = arm_locs.array.copy()
        else:
            arm_locs = arm_locs.copy()

        w_offset = arm_locs[:, :, 2:] + mb_locs[:, :, 2:]
        x_offset = xp.exp(arm_locs[:, :, 2:] * variance[1]) * mb_locs[:, :, :2]
        x_offset += arm_locs[:, :, :2]
        mb_locs = F.dstack((x_offset, w_offset))

    positive = gt_mb_labels.array > 0
    n_positive = positive.sum()
    if n_positive == 0:
        z = chainer.Variable(xp.zeros((), dtype=np.float32))
        return z, z

    loc_loss = F.huber_loss(mb_locs, gt_mb_locs, 1, reduce='no')
    if arm_confs is not None:
        if isinstance(arm_locs, chainer.Variable):
            arm_confs = arm_confs.array.copy()
        else:
            arm_confs = arm_confs.copy()

        objectness = xp.exp(arm_confs)
        negativeness = xp.exp(1 - arm_confs)
        objectness /= objectness + negativeness
        objectness[objectness <= 0.01] = 0
        objectness[objectness > 0.01] = 1
        objectness = objectness.reshape(objectness.shape[0],
                                        objectness.shape[1])
        n_positive = (positive * objectness).sum()
    else:
        objectness = None

    loc_loss = F.sum(loc_loss, axis=-1)
    loc_loss *= positive.astype(loc_loss.dtype)
    if objectness is not None:
        loc_loss *= objectness.astype(loc_loss.dtype)
    loc_loss = F.sum(loc_loss) / n_positive

    conf_loss = _elementwise_softmax_cross_entropy(mb_confs, gt_mb_labels,
                                                   binarize)

    hard_negative = _hard_negative(conf_loss.array, positive, k, objectness)
    if arm_confs is not None:
        positive *= objectness.astype(positive.dtype)
    conf_loss *= xp.logical_or(positive, hard_negative).astype(conf_loss.dtype)
    conf_loss = F.sum(conf_loss) / n_positive

    return loc_loss, conf_loss
Exemple #17
0
def smooth_l1(x, t, beta):
    return F.huber_loss(x, t, beta, reduce='no') / beta
Exemple #18
0
	def compute_loss(self, s, a, r, new_s, done, loss_log=False):
		if self.net_type == "full":
			s = s.reshape(self.batch_size, self.input_slides*self.size*self.size)
			new_s = new_s.reshape(self.batch_size, self.input_slides*self.size*self.size)

		#gpu
		if self.gpu >= 0:
			s = cuda.to_gpu(s)
			new_s = cuda.to_gpu(new_s)
		if chainer.__version__ >= "2.0.0":
			s = Variable(s)
			new_s = Variable(new_s)
		else:
			s = Variable(s, volatile='auto')
			new_s = Variable(new_s, volatile='auto')
		q_value = self.q(s)

		with chainer.no_backprop_mode():
			if self.mode == "regularize":
				tg_q_value = self.q(new_s)
			elif self.mode == "target_mix":
				tg_q_value = (1.0-self.mix_rate) * self.q(new_s) + self.mix_rate * self.fixed_q(new_s)
			elif self.mode == "default":
				tg_q_value = self.fixed_q(new_s)
		#print "tg_q_value[0]", tg_q_value[0].data

		if self.gpu >= 0:
			a = cuda.to_gpu(a)
			r = cuda.to_gpu(r)
			done = cuda.to_gpu(done)

		if chainer.__version__ >= "2.0.0":
			a = Variable(a)
		else:
			a = Variable(a, volatile='auto')

		argmax_a = F.argmax(tg_q_value, axis=1)

		#print a
		#print r
		q_action_value = F.select_item(q_value, a)
		#print "q_action_value", q_action_value.data
		target = r + self.discount * (1.0 - done) * F.select_item(tg_q_value, argmax_a)
		#print "target", target.data
		#target is float32

		q_action_value = F.reshape(q_action_value, (-1, 1))
		target = F.reshape(target, (-1, 1))

		loss_sum = F.sum(F.huber_loss(q_action_value, target, delta=1.0))
		loss = loss_sum / q_action_value.shape[0]
		#print "loss_a", loss.data

		if self.mode == "regularize" or loss_log == True:
			if self.penalty_function == "value":
				y = q_value
				with chainer.no_backprop_mode():
					t = self.fixed_q(s)
			if self.penalty_function == "action_value":
				y = q_action_value
				with chainer.no_backprop_mode():
					t = F.select_item(self.fixed_q(s), a)
					t = F.reshape(t, (-1, 1))
			if self.penalty_function == "max_action_value":
				y = F.select_item(self.q(new_s), argmax_a)
				y = F.reshape(y, (-1, 1))
				with chainer.no_backprop_mode():
					t = F.select_item(self.fixed_q(new_s), argmax_a)
					t = F.reshape(t, (-1, 1))

			if self.penalty_type == "huber":
				if self.final_penalty_cut == 1:
					penalty_sum = F.sum((1.0 - done)*F.huber_loss(y, t, delta=1.0))
				else:
					penalty_sum = F.sum(F.huber_loss(y, t, delta=1.0))
				penalty = penalty_sum / (y.shape[0]*y.shape[1])
			if self.penalty_type == "mean_squared":
				penalty = F.mean_squared_error(y, t)

			if loss_log == True:
				#y_data = cuda.to_cpu(y.data)
				#t_data = cuda.to_cpu(t.data)
				return loss, penalty
				#return loss, penalty, np.average(y_data), np.std(y_data), np.average(t_data), np.std(t_data)

			if penalty.data > self.threshold:
				#print "-------------on----------------"
				loss = loss + self.penalty_weight * penalty
		#print "loss_b", loss.data
		return loss
Exemple #19
0
def mean_clipped_loss(y, t):
    return F.mean(F.huber_loss(y, t, delta=1.0, reduce='no'))
Exemple #20
0
 def check_invalid_option(self, xp):
     x = xp.asarray(self.x)
     t = xp.asarray(self.t)
     with self.assertRaises(ValueError):
         functions.huber_loss(x, t, 1, 'invalid_option')
 def loss(self, pred_y, true_y):
     loss = F.huber_loss(pred_y, true_y.astype('f'), 1.0, reduce='no')
     return F.mean(loss)