def train(self, x, y, actions=None): actions = actions.astype(np.int32) batch_size = len(actions) if self._gpu_device: x = cuda.to_gpu(x, self._gpu_device) y = cuda.to_gpu(y, self._gpu_device) actions = cuda.to_gpu(actions, self._gpu_device) q = self._model(x) q_subset = F.reshape(F.select_item(q, actions), (batch_size, 1)) y = y.reshape(batch_size, 1) loss = F.sum(F.huber_loss(q_subset, y, 1.0)) self._model.cleargrads() loss.backward() self._optimizer.update() self._loss_val = np.asscalar(cuda.to_cpu(loss.data)) # Keeps track of the number of train() calls self._steps += 1 if self._steps % self._target_update_interval == 0: # copy weights self._target.copyparams(self._model)
def check_forward(self, x_data, t_data): x = chainer.Variable(x_data) t = chainer.Variable(t_data) loss = functions.huber_loss(x, t, delta=1) self.assertEqual(loss.data.dtype, numpy.float32) loss_value = cuda.to_cpu(loss.data) diff_data = cuda.to_cpu(x_data) - cuda.to_cpu(t_data) expected_result = numpy.zeros(self.shape) mask = numpy.abs(diff_data) < 1 expected_result[mask] = 0.5 * diff_data[mask] ** 2 expected_result[~mask] = numpy.abs(diff_data[~mask]) - 0.5 loss_expect = numpy.sum(expected_result, axis=1) testing.assert_allclose(loss_value, loss_expect)
def check_forward(self, x_data, t_data): x = chainer.Variable(x_data) t = chainer.Variable(t_data) loss = functions.huber_loss(x, t, delta=1, reduce=self.reduce) self.assertEqual(loss.data.dtype, numpy.float32) loss_value = cuda.to_cpu(loss.data) diff_data = cuda.to_cpu(x_data) - cuda.to_cpu(t_data) loss_expect = numpy.zeros(self.shape) mask = numpy.abs(diff_data) < 1 loss_expect[mask] = 0.5 * diff_data[mask] ** 2 loss_expect[~mask] = numpy.abs(diff_data[~mask]) - 0.5 if self.reduce == 'sum_along_second_axis': loss_expect = numpy.sum(loss_expect, axis=1) testing.assert_allclose(loss_value, loss_expect)
def _calc_rpn_loss_bbox(self, rpn_bbox_pred, bbox_reg_targets, inds_inside): # rpn_bbox_pred has the shape of (1, 4 x n_anchors, feat_h, feat_w) n_anchors = self.proposal_layer._num_anchors # Reshape it into (4, A, K) rpn_bbox_pred = rpn_bbox_pred.reshape(4, n_anchors, -1) # Transpose it into (K, A, 4) rpn_bbox_pred = rpn_bbox_pred.transpose(2, 1, 0) # Reshape it into (K x A, 4) rpn_bbox_pred = rpn_bbox_pred.reshape(-1, 4) # Keep the number of bbox n_bbox = rpn_bbox_pred.shape[0] # Select bbox and ravel it rpn_bbox_pred = F.flatten(rpn_bbox_pred[inds_inside]) # Create batch dimension rpn_bbox_pred = F.expand_dims(rpn_bbox_pred, 0) # Ravel the targets and create batch dimension bbox_reg_targets = bbox_reg_targets.ravel()[None, :] # Calc Smooth L1 Loss (When delta=1, huber loss is SmoothL1Loss) rpn_loss_bbox = F.huber_loss(rpn_bbox_pred, bbox_reg_targets, self._delta) rpn_loss_bbox /= n_bbox return rpn_loss_bbox.reshape(())
def multibox_loss(mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k, comm=None): """Computes multibox losses. This is a loss function used in [#]_. This function returns :obj:`loc_loss` and :obj:`conf_loss`. :obj:`loc_loss` is a loss for localization and :obj:`conf_loss` is a loss for classification. The formulas of these losses can be found in the equation (2) and (3) in the original paper. .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector. ECCV 2016. Args: mb_locs (chainer.Variable or array): The offsets and scales for predicted bounding boxes. Its shape is :math:`(B, K, 4)`, where :math:`B` is the number of samples in the batch and :math:`K` is the number of default bounding boxes. mb_confs (chainer.Variable or array): The classes of predicted bounding boxes. Its shape is :math:`(B, K, n\_class)`. This function assumes the first class is background (negative). gt_mb_locs (chainer.Variable or array): The offsets and scales for ground truth bounding boxes. Its shape is :math:`(B, K, 4)`. gt_mb_labels (chainer.Variable or array): The classes of ground truth bounding boxes. Its shape is :math:`(B, K)`. k (float): A coefficient which is used for hard negative mining. This value determines the ratio between the number of positives and that of mined negatives. The value used in the original paper is :obj:`3`. comm (~chainermn.communicators.CommunicatorBase): A ChainerMN communicator. If it is specified, the number of positive examples is computed among all GPUs. Returns: tuple of chainer.Variable: This function returns two :obj:`chainer.Variable`: :obj:`loc_loss` and :obj:`conf_loss`. """ mb_locs = chainer.as_variable(mb_locs) mb_confs = chainer.as_variable(mb_confs) gt_mb_locs = chainer.as_variable(gt_mb_locs) gt_mb_labels = chainer.as_variable(gt_mb_labels) xp = chainer.backends.cuda.get_array_module(gt_mb_labels.array) with chainer.backends.cuda.get_device_from_array(gt_mb_labels.array): positive = gt_mb_labels.array > 0 n_positive = positive.sum() if comm: n_positive = comm.allreduce_obj(n_positive) / comm.size if n_positive == 0: z = chainer.Variable(xp.zeros((), dtype=np.float32)) return z, z loc_loss = F.huber_loss(mb_locs, gt_mb_locs, 1, reduce='no') loc_loss = F.sum(loc_loss, axis=-1) loc_loss *= positive.astype(loc_loss.dtype) loc_loss = F.sum(loc_loss) / n_positive conf_loss = _elementwise_softmax_cross_entropy(mb_confs, gt_mb_labels) hard_negative = _hard_negative(conf_loss.array, positive, k) conf_loss *= xp.logical_or(positive, hard_negative).astype(conf_loss.dtype) conf_loss = F.sum(conf_loss) / n_positive return loc_loss, conf_loss
def _check_forward(self, mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k): if self.variable: mb_locs = chainer.Variable(mb_locs) mb_confs = chainer.Variable(mb_confs) gt_mb_locs = chainer.Variable(gt_mb_locs) gt_mb_labels = chainer.Variable(gt_mb_labels) loc_loss, conf_loss = multibox_loss(mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k) self.assertIsInstance(loc_loss, chainer.Variable) self.assertEqual(loc_loss.shape, ()) self.assertEqual(loc_loss.dtype, mb_locs.dtype) self.assertIsInstance(conf_loss, chainer.Variable) self.assertEqual(conf_loss.shape, ()) self.assertEqual(conf_loss.dtype, mb_confs.dtype) if self.variable: mb_locs = mb_locs.array mb_confs = mb_confs.array gt_mb_locs = gt_mb_locs.array gt_mb_labels = gt_mb_labels.array mb_locs = cuda.to_cpu(mb_locs) mb_confs = cuda.to_cpu(mb_confs) gt_mb_locs = cuda.to_cpu(gt_mb_locs) gt_mb_labels = cuda.to_cpu(gt_mb_labels) loc_loss = cuda.to_cpu(loc_loss.array) conf_loss = cuda.to_cpu(conf_loss.array) n_positive_total = 0 expect_loc_loss = 0 expect_conf_loss = 0 for i in six.moves.xrange(gt_mb_labels.shape[0]): n_positive = 0 negatives = [] for j in six.moves.xrange(gt_mb_labels.shape[1]): loc = F.huber_loss(mb_locs[np.newaxis, i, j], gt_mb_locs[np.newaxis, i, j], 1).array conf = F.softmax_cross_entropy(mb_confs[np.newaxis, i, j], gt_mb_labels[np.newaxis, i, j]).array if gt_mb_labels[i, j] > 0: n_positive += 1 expect_loc_loss += loc expect_conf_loss += conf else: negatives.append(conf) n_positive_total += n_positive if n_positive > 0: expect_conf_loss += sum(sorted(negatives)[-n_positive * k:]) if n_positive_total == 0: expect_loc_loss = 0 expect_conf_loss = 0 else: expect_loc_loss /= n_positive_total expect_conf_loss /= n_positive_total np.testing.assert_almost_equal(loc_loss, expect_loc_loss, decimal=2) np.testing.assert_almost_equal(conf_loss, expect_conf_loss, decimal=2)
def __call__(self, left, right, disp_true): refimg_fea = self.feature_extraction(left) targetimg_fea = self.feature_extraction(right) # matching # with chainer.no_backprop_mode(): cost = None for i in range(int(self.maxdisp / 4)): if i > 0: # limit size i cost_i = F.concat( (refimg_fea[:, :, :, i:], targetimg_fea[:, :, :, :-i]), axis=1).reshape(refimg_fea.shape[0], refimg_fea.shape[1] * 2, 1, refimg_fea.shape[2], refimg_fea.shape[3] - i) cost_zero = Variable( cuda.cupy.zeros( (refimg_fea.shape[0], int(refimg_fea.shape[1] * 2), 1, refimg_fea.shape[2], i), dtype=cuda.cupy.float32)) cost_i = F.concat((cost_zero, cost_i), axis=4) cost = F.concat((cost, cost_i), axis=2) else: cost = F.concat( (refimg_fea, targetimg_fea), axis=1).reshape(refimg_fea.shape[0], refimg_fea.shape[1] * 2, 1, refimg_fea.shape[2], refimg_fea.shape[3]) # gpu0 to gpu1 cost = F.copy(cost, self.gpu1) cost0 = self.dres0(cost) cost0 = self.dres1(cost0) + cost0 cost0 = self.dres2(cost0) + cost0 cost0 = self.dres3(cost0) + cost0 cost0 = self.dres4(cost0) + cost0 cost = self.classify(cost0) # gpu1 to gpu0 cost = F.copy(cost, self.gpu0) cost = F.unpooling_nd(cost, 4, outsize=(self.maxdisp, left.shape[2], left.shape[3])) cost = F.average_pooling_nd(cost, 3, 1, 1) # here insert average_pooling_nd(kernel=3, stride=1) for trilinear upsampling !!! cost = F.squeeze(cost, 1) pred = F.softmax(cost) # ??? pred = disparityregression(self.maxdisp)(pred) # calculate loss pred = F.clip(pred.reshape(pred.shape[0], -1), 0., float(self.maxdisp)) disp_true = disp_true.reshape(disp_true.shape[0], -1) # mask if self.train_type == "kitti": pred_mask = F.where(disp_true > 0., pred, disp_true) elif self.train_type == "sceneflow": pred_mask = F.where(disp_true < maxdisp, pred, disp_true) else: pred_mask = pred #mask = Variable(disp_true).array < self.maxdisp loss = F.huber_loss(pred_mask, disp_true, delta=1) loss = F.average(loss / pred_mask.shape[1]) chainer.reporter.report({'loss': loss}, self) if self.training: return loss else: return pred.reshape(1, 1, left.shape[2], right.shape[3])
def mean_clipped_loss(y, t): return F.mean(F.huber_loss(y, t, delta=1.0, reduce='no'))
def f(x, t): return functions.huber_loss(x, t, delta=1, reduce=self.reduce)
def check_invalid_option(self, xp): x = xp.asarray(self.x) t = xp.asarray(self.t) with self.assertRaises(ValueError): functions.huber_loss(x, t, 1, 'invalid_option')
def multibox_focal_loss(mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k): """Computes multibox losses. This is a loss function used in [#]_. This function returns :obj:`loc_loss` and :obj:`conf_loss`. :obj:`loc_loss` is a loss for localization and :obj:`conf_loss` is a loss for classification. The formulas of these losses can be found in the equation (2) and (3) in the original paper. .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector. ECCV 2016. Args: mb_locs (chainer.Variable or array): The offsets and scales for predicted bounding boxes. Its shape is :math:`(B, K, 4)`, where :math:`B` is the number of samples in the batch and :math:`K` is the number of default bounding boxes. mb_confs (chainer.Variable or array): The classes of predicted bounding boxes. Its shape is :math:`(B, K, n\_class)`. This function assumes the first class is background (negative). gt_mb_locs (chainer.Variable or array): The offsets and scales for ground truth bounding boxes. Its shape is :math:`(B, K, 4)`. gt_mb_labels (chainer.Variable or array): The classes of ground truth bounding boxes. Its shape is :math:`(B, K)`. k (float): A coefficient which is used for hard negative mining. This value determines the ratio between the number of positives and that of mined negatives. The value used in the original paper is :obj:`3`. Returns: tuple of chainer.Variable: This function returns two :obj:`chainer.Variable`: :obj:`loc_loss` and :obj:`conf_loss`. """ mb_locs = chainer.as_variable(mb_locs) mb_confs = chainer.as_variable(mb_confs) gt_mb_locs = chainer.as_variable(gt_mb_locs) #gt_mb_labels = chainer.as_variable(gt_mb_labels) xp = chainer.cuda.get_array_module(gt_mb_locs.array) #print(gt_mb_labels.array.device) #print('Multibox') #print(chainer.cuda.get_device_from_array(gt_mb_labels.array)) #with gt_mb_labels.array.device: #positive = gt_mb_labels.array > 0 positive = gt_mb_labels > 0 n_positive = positive.sum() if n_positive == 0: z = chainer.Variable(xp.zeros((), dtype=np.float32)) return z, z loc_loss = F.huber_loss(mb_locs, gt_mb_locs, 1, reduce='no') loc_loss = F.sum(loc_loss, axis=-1) loc_loss *= positive.astype(loc_loss.dtype) loc_loss = F.sum(loc_loss) / n_positive #conf_loss = _elementwise_softmax_cross_entropy(mb_confs, gt_mb_labels) #hard_negative = _hard_negative(conf_loss.array, positive, k) #conf_loss *= xp.logical_or(positive, hard_negative).astype(conf_loss.dtype) alpha = 0.75 gamma = 2 t = gt_mb_labels.reshape(gt_mb_labels.shape[0] * gt_mb_labels.shape[1], ) class_num = mb_confs.shape[2] # class_num includes back ground class t = F.cast(chainer.as_variable(xp.eye(class_num)[t]), loc_loss.dtype) t = t.reshape(gt_mb_labels.shape[0], gt_mb_labels.shape[1], class_num) p = F.sigmoid(mb_confs) #pt = p * t + (1 - p) * (1 - t) # pt = p if t > 0 else 1-p #w = alpha * t + (1 - alpha) * (1 - t) # w = alpha if t > 0 else 1 - alpha #w = w * ((1 - pt) ** gamma) pt = F.where(t.array > 0, p, 1 - p) w = (1 - pt)**gamma w = F.where(t.array > 0, alpha * w, (1 - alpha) * w) # From Pytorch implemetation binary_cross_entropy_with_logits # https://pytorch.org/docs/master/_modules/torch/nn/functional.html#binary_cross_entropy_with_logits max_val = F.clip(-mb_confs, x_min=0.0, x_max=10.0e+12) focal_loss = mb_confs - mb_confs * t + max_val + F.log( F.exp(-max_val) + F.exp(-mb_confs - max_val)) focal_loss = F.sum(focal_loss * w) / n_positive #focal_loss = -F.sum(w * F.log(pt + 1e-12)) / n_positive return loc_loss, focal_loss
def _check_forward(self, mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k): if self.variable: mb_locs = chainer.Variable(mb_locs) mb_confs = chainer.Variable(mb_confs) gt_mb_locs = chainer.Variable(gt_mb_locs) gt_mb_labels = chainer.Variable(gt_mb_labels) loc_loss, conf_loss = multibox_loss( mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k) self.assertIsInstance(loc_loss, chainer.Variable) self.assertEqual(loc_loss.shape, ()) self.assertEqual(loc_loss.dtype, mb_locs.dtype) self.assertIsInstance(conf_loss, chainer.Variable) self.assertEqual(conf_loss.shape, ()) self.assertEqual(conf_loss.dtype, mb_confs.dtype) if self.variable: mb_locs = mb_locs.array mb_confs = mb_confs.array gt_mb_locs = gt_mb_locs.array gt_mb_labels = gt_mb_labels.array mb_locs = cuda.to_cpu(mb_locs) mb_confs = cuda.to_cpu(mb_confs) gt_mb_locs = cuda.to_cpu(gt_mb_locs) gt_mb_labels = cuda.to_cpu(gt_mb_labels) loc_loss = cuda.to_cpu(loc_loss.array) conf_loss = cuda.to_cpu(conf_loss.array) n_positive_total = 0 expect_loc_loss = 0 expect_conf_loss = 0 for i in six.moves.xrange(gt_mb_labels.shape[0]): n_positive = 0 negatives = [] for j in six.moves.xrange(gt_mb_labels.shape[1]): loc = F.huber_loss( mb_locs[np.newaxis, i, j], gt_mb_locs[np.newaxis, i, j], 1).array conf = F.softmax_cross_entropy( mb_confs[np.newaxis, i, j], gt_mb_labels[np.newaxis, i, j]).array if gt_mb_labels[i, j] > 0: n_positive += 1 expect_loc_loss += loc expect_conf_loss += conf else: negatives.append(conf) n_positive_total += n_positive if n_positive > 0: expect_conf_loss += sum(sorted(negatives)[-n_positive * k:]) if n_positive_total == 0: expect_loc_loss = 0 expect_conf_loss = 0 else: expect_loc_loss /= n_positive_total expect_conf_loss /= n_positive_total np.testing.assert_almost_equal( loc_loss, expect_loc_loss, decimal=2) np.testing.assert_almost_equal( conf_loss, expect_conf_loss, decimal=2)
def calc_loss(self, image_size, predicted_grids, gt_bbox_points, objectness_scores, normalize=True): predicted_bbox_points = self.get_corners(predicted_grids, image_size, scale_to_image_size=False) # 1. transform box coordinates to aabb coordinates for determination of iou predicted_bbox_points = predicted_bbox_points[ 0], predicted_bbox_points[4], predicted_bbox_points[ 3], predicted_bbox_points[7] predicted_bbox_points = F.stack(predicted_bbox_points, axis=1) # 2. find best prediction area for each gt bbox gt_bboxes_to_use_for_loss = [] positive_anchor_indices = self.xp.empty((0, ), dtype=self.xp.int32) not_contributing_anchors = self.xp.empty((0, ), dtype=self.xp.int32) for index, gt_bbox in enumerate(gt_bbox_points): # determine which bboxes are positive boxes as they have high iou with gt and also which bboxes are negative # this is also used to train objectness classification gt_bbox = self.xp.tile(gt_bbox[None, ...], (len(predicted_bbox_points), 1)) ious = bbox_iou(gt_bbox, predicted_bbox_points.data) positive_boxes = self.xp.where((ious[0] >= 0.7)) not_contributing_boxes = self.xp.where( self.xp.logical_and(0.3 < ious[0], ious[0] < 0.7)) if len(positive_boxes[0]) == 0: best_iou_index = ious[0, :].argmax() positive_anchor_indices = self.xp.concatenate( (positive_anchor_indices, best_iou_index[None, ...]), axis=0) gt_bboxes_to_use_for_loss.append(gt_bbox[0]) else: positive_anchor_indices = self.xp.concatenate( (positive_anchor_indices, positive_boxes[0]), axis=0) gt_bboxes_to_use_for_loss.extend( gt_bbox[:len(positive_boxes[0])]) not_contributing_anchors = self.xp.concatenate( (not_contributing_anchors, not_contributing_boxes[0]), axis=0) if len(gt_bboxes_to_use_for_loss) == 0: return Variable(self.xp.array(0, dtype=predicted_grids.dtype)) gt_bboxes_to_use_for_loss = F.stack(gt_bboxes_to_use_for_loss) # filter predicted bboxes and only keep bboxes from those regions that actually contain a bbox predicted_bbox_points = F.get_item(predicted_bbox_points, positive_anchor_indices) # 3. calculate L1 loss for bbox regression loss = F.huber_loss(predicted_bbox_points, gt_bboxes_to_use_for_loss, 1) # 4. calculate objectness loss objectness_labels = self.xp.zeros(len(objectness_scores), dtype=self.xp.int32) objectness_labels[not_contributing_anchors] = -1 objectness_labels[positive_anchor_indices] = 1 objectness_loss = F.softmax_cross_entropy( objectness_scores, objectness_labels, ignore_label=-1, ) return F.mean(loss), objectness_loss
def mean_clipped_loss(y, t): # Add an axis because F.huber_loss only accepts arrays with ndim >= 2 y = F.expand_dims(y, axis=-1) t = F.expand_dims(t, axis=-1) return F.sum(F.huber_loss(y, t, 1.0)) / y.shape[0]
def __call__(self, x, img_info, gt_boxes=None): """Faster RCNN forward Args: x (:class:`~chainer.Variable`): The input image. Note that the batchsize should be 1. So the shape should be :math:`(1, n_channels, height, width)`. img_info (:class:`~chainer.Variable`): The input image info. It contains :math:`(height, width)` and the batchsize should be 1. So the shape should be :math:`(1, 2)`. gt_boxes (:class:`~chainer.Variable`): The ground truth bounding boxes and its class label array. The shape should be :math:`(1, n_gt_boxes, 5)` and the batchsize should be 1. """ if self.type_check_enable: self._check_data_type_forward(x, img_info, gt_boxes) # Use the array module of the backend of trunk model with cuda.get_device_from_array(x.data): xp, feature_map = self.trunk.xp, self.trunk(x) # RPN training mode if self.rpn_train and gt_boxes is not None: return self.RPN(feature_map, img_info, gt_boxes) else: proposals, probs = self.RPN(feature_map, img_info, gt_boxes) self.rpn_proposals = proposals self.rpn_probs = probs # RCNN batch_id = xp.zeros((len(proposals), 1), dtype=xp.float32) brois = xp.concatenate((batch_id, proposals), axis=1) pool5 = F.roi_pooling_2d(feature_map, brois, 7, 7, self._spatial_scale) fc6 = F.dropout(F.relu(self.fc6(pool5)), train=self.rcnn_train) fc7 = F.dropout(F.relu(self.fc7(fc6)), train=self.rcnn_train) # Per class probability cls_score = self.cls_score(fc7) # BBox predictions bbox_pred = self.bbox_pred(fc7) if self.rcnn_train and gt_boxes is not None: # Create proposal target layer if not exsist if not hasattr(self, 'proposal_target_layer'): self.proposal_target_layer = ProposalTargetLayer( self._feat_stride, self._anchor_ratios, self._anchor_scales, self._num_classes) use_gt_boxes, bbox_reg_targets, keep_inds = \ self.proposal_target_layer(proposals, gt_boxes) # TODO(mitmul): Remove this re-sending below vars to GPU xp = self.RPN.xp if xp is cuda.cupy: use_gt_boxes = xp.asarray(use_gt_boxes) bbox_reg_targets = xp.asarray(bbox_reg_targets) keep_inds = xp.asarray(keep_inds) # Select predicted scores and calc loss cls_score = cls_score[keep_inds] cls_labels = use_gt_boxes[:, -1].astype(xp.int32) loss_cls = F.softmax_cross_entropy(cls_score, cls_labels) loss_cls = loss_cls.reshape(()) cls_acc = F.accuracy(cls_score, cls_labels, -1) # Select predicted bbox transformations and calc loss bbox_pred = bbox_pred[keep_inds] loss_bbox = F.huber_loss(bbox_pred, bbox_reg_targets, self._rcnn_delta) loss_bbox = F.sum(loss_bbox) / loss_bbox.size loss_bbox = loss_bbox.reshape(()) loss_rcnn = loss_cls + loss_bbox reporter.report({'loss_cls': loss_cls, 'cls_accuracy': cls_acc, 'loss_bbox': loss_bbox, 'loss_rcnn': loss_rcnn}, self) return loss_rcnn pred_boxes = bbox_transform_inv(proposals, bbox_pred.data) pred_boxes = clip_boxes(pred_boxes, img_info.data[0]) return F.softmax(cls_score), pred_boxes
def multibox_loss(mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k, binarize=False, arm_confs=None, arm_locs=None): """Computes multibox losses. Different from :obj:`chainercv.MultiboxCoder`, Cascared offset regression and negative anchor filtering and arm binarization loss is supported. This is a loss function used in [#]_. This function returns :obj:`loc_loss` and :obj:`conf_loss`. :obj:`loc_loss` is a loss for localization and :obj:`conf_loss` is a loss for classification. The formulas of these losses can be found in the equation (2) and (3) in the original paper. .. [#] Shifeng Zhang, Longyin Wen, Xiao Bian, Zhen Lei, Stan Z. Li. Single-Shot Refinement Neural Network for Object Detection. Args: mb_locs (chainer.Variable or array): The offsets and scales for predicted bounding boxes. Its shape is :math:`(B, K, 4)`, where :math:`B` is the number of samples in the batch and :math:`K` is the number of default bounding boxes. mb_confs (chainer.Variable or array): The classes of predicted bounding boxes. Its shape is :math:`(B, K, n\_class)`. This function assumes the first class is background (negative). gt_mb_locs (chainer.Variable or array): The offsets and scales for ground truth bounding boxes. Its shape is :math:`(B, K, 4)`. gt_mb_labels (chainer.Variable or array): The classes of ground truth bounding boxes. Its shape is :math:`(B, K)`. k (float): A coefficient which is used for hard negative mining. This value determines the ratio between the number of positives and that of mined negatives. The value used in the original paper is :obj:`3`. binarize(bool): If True, conf loss objective is binarized (Any class or background). arm_confs(chainer.Variable or None): If not `None`, negative anchor filtering is enabled. Indexes where :obj:`arm_confs` <= 0.01, will not be used to training. arm_locs(chainer.Variable or None): If not `None`, cascaded offset regression is enabled. Returns: tuple of chainer.Variable: This function returns two :obj:`chainer.Variable`: :obj:`loc_loss` and :obj:`conf_loss`. """ variance = (0.1, 0.2) mb_locs = chainer.as_variable(mb_locs) mb_confs = chainer.as_variable(mb_confs) gt_mb_locs = chainer.as_variable(gt_mb_locs) gt_mb_labels = chainer.as_variable(gt_mb_labels) xp = chainer.cuda.get_array_module(gt_mb_labels.array) if arm_locs is not None: if isinstance(arm_locs, chainer.Variable): arm_locs = arm_locs.array.copy() else: arm_locs = arm_locs.copy() w_offset = arm_locs[:, :, 2:] + mb_locs[:, :, 2:] x_offset = xp.exp(arm_locs[:, :, 2:] * variance[1]) * mb_locs[:, :, :2] x_offset += arm_locs[:, :, :2] mb_locs = F.dstack((x_offset, w_offset)) positive = gt_mb_labels.array > 0 n_positive = positive.sum() if n_positive == 0: z = chainer.Variable(xp.zeros((), dtype=np.float32)) return z, z loc_loss = F.huber_loss(mb_locs, gt_mb_locs, 1, reduce='no') if arm_confs is not None: if isinstance(arm_locs, chainer.Variable): arm_confs = arm_confs.array.copy() else: arm_confs = arm_confs.copy() objectness = xp.exp(arm_confs) negativeness = xp.exp(1 - arm_confs) objectness /= objectness + negativeness objectness[objectness <= 0.01] = 0 objectness[objectness > 0.01] = 1 objectness = objectness.reshape(objectness.shape[0], objectness.shape[1]) n_positive = (positive * objectness).sum() else: objectness = None loc_loss = F.sum(loc_loss, axis=-1) loc_loss *= positive.astype(loc_loss.dtype) if objectness is not None: loc_loss *= objectness.astype(loc_loss.dtype) loc_loss = F.sum(loc_loss) / n_positive conf_loss = _elementwise_softmax_cross_entropy(mb_confs, gt_mb_labels, binarize) hard_negative = _hard_negative(conf_loss.array, positive, k, objectness) if arm_confs is not None: positive *= objectness.astype(positive.dtype) conf_loss *= xp.logical_or(positive, hard_negative).astype(conf_loss.dtype) conf_loss = F.sum(conf_loss) / n_positive return loc_loss, conf_loss
def smooth_l1(x, t, beta): return F.huber_loss(x, t, beta, reduce='no') / beta
def compute_loss(self, s, a, r, new_s, done, loss_log=False): if self.net_type == "full": s = s.reshape(self.batch_size, self.input_slides*self.size*self.size) new_s = new_s.reshape(self.batch_size, self.input_slides*self.size*self.size) #gpu if self.gpu >= 0: s = cuda.to_gpu(s) new_s = cuda.to_gpu(new_s) if chainer.__version__ >= "2.0.0": s = Variable(s) new_s = Variable(new_s) else: s = Variable(s, volatile='auto') new_s = Variable(new_s, volatile='auto') q_value = self.q(s) with chainer.no_backprop_mode(): if self.mode == "regularize": tg_q_value = self.q(new_s) elif self.mode == "target_mix": tg_q_value = (1.0-self.mix_rate) * self.q(new_s) + self.mix_rate * self.fixed_q(new_s) elif self.mode == "default": tg_q_value = self.fixed_q(new_s) #print "tg_q_value[0]", tg_q_value[0].data if self.gpu >= 0: a = cuda.to_gpu(a) r = cuda.to_gpu(r) done = cuda.to_gpu(done) if chainer.__version__ >= "2.0.0": a = Variable(a) else: a = Variable(a, volatile='auto') argmax_a = F.argmax(tg_q_value, axis=1) #print a #print r q_action_value = F.select_item(q_value, a) #print "q_action_value", q_action_value.data target = r + self.discount * (1.0 - done) * F.select_item(tg_q_value, argmax_a) #print "target", target.data #target is float32 q_action_value = F.reshape(q_action_value, (-1, 1)) target = F.reshape(target, (-1, 1)) loss_sum = F.sum(F.huber_loss(q_action_value, target, delta=1.0)) loss = loss_sum / q_action_value.shape[0] #print "loss_a", loss.data if self.mode == "regularize" or loss_log == True: if self.penalty_function == "value": y = q_value with chainer.no_backprop_mode(): t = self.fixed_q(s) if self.penalty_function == "action_value": y = q_action_value with chainer.no_backprop_mode(): t = F.select_item(self.fixed_q(s), a) t = F.reshape(t, (-1, 1)) if self.penalty_function == "max_action_value": y = F.select_item(self.q(new_s), argmax_a) y = F.reshape(y, (-1, 1)) with chainer.no_backprop_mode(): t = F.select_item(self.fixed_q(new_s), argmax_a) t = F.reshape(t, (-1, 1)) if self.penalty_type == "huber": if self.final_penalty_cut == 1: penalty_sum = F.sum((1.0 - done)*F.huber_loss(y, t, delta=1.0)) else: penalty_sum = F.sum(F.huber_loss(y, t, delta=1.0)) penalty = penalty_sum / (y.shape[0]*y.shape[1]) if self.penalty_type == "mean_squared": penalty = F.mean_squared_error(y, t) if loss_log == True: #y_data = cuda.to_cpu(y.data) #t_data = cuda.to_cpu(t.data) return loss, penalty #return loss, penalty, np.average(y_data), np.std(y_data), np.average(t_data), np.std(t_data) if penalty.data > self.threshold: #print "-------------on----------------" loss = loss + self.penalty_weight * penalty #print "loss_b", loss.data return loss
def mean_clipped_loss(y, t): return F.mean(F.huber_loss(y, t, delta=1.0, reduce='no'))
def check_invalid_option(self, xp): x = xp.asarray(self.x) t = xp.asarray(self.t) with self.assertRaises(ValueError): functions.huber_loss(x, t, 1, 'invalid_option')
def loss(self, pred_y, true_y): loss = F.huber_loss(pred_y, true_y.astype('f'), 1.0, reduce='no') return F.mean(loss)