def add_cascade_fast_rcnn_losses(model, stage_num): """Add losses for RoI classification and bounding box regression.""" if stage_num == 1: cls_prob, loss_cls = model.net.SoftmaxWithLoss( ['cls_score_1st', 'labels_int32_1st'], ['cls_prob_1st', 'loss_cls_1st'], scale=model.GetLossScale() * cfg.CASCADERCNN.WEIGHT_LOSS_BBOX_STAGE1 ) loss_bbox = model.net.SmoothL1Loss( [ 'bbox_pred_1st', 'bbox_targets_1st', 'bbox_inside_weights_1st', 'bbox_outside_weights_1st' ], 'loss_bbox_1st', scale=model.GetLossScale() * cfg.CASCADERCNN.WEIGHT_LOSS_BBOX_STAGE1 ) loss_gradients = blob_utils.get_loss_gradients(model, [loss_cls, loss_bbox]) model.Accuracy(['cls_prob_1st', 'labels_int32_1st'], 'accuracy_cls_1st') model.AddLosses(['loss_cls_1st', 'loss_bbox_1st']) model.AddMetrics('accuracy_cls_1st') elif stage_num == 2: cls_prob, loss_cls = model.net.SoftmaxWithLoss( ['cls_score_2nd', 'labels_int32_2nd'], ['cls_prob_2nd', 'loss_cls_2nd'], scale=model.GetLossScale() * cfg.CASCADERCNN.WEIGHT_LOSS_BBOX_STAGE2 ) loss_bbox = model.net.SmoothL1Loss( [ 'bbox_pred_2nd', 'bbox_targets_2nd', 'bbox_inside_weights_2nd', 'bbox_outside_weights_2nd' ], 'loss_bbox_2nd', scale=model.GetLossScale() * cfg.CASCADERCNN.WEIGHT_LOSS_BBOX_STAGE2 ) loss_gradients = blob_utils.get_loss_gradients(model, [loss_cls, loss_bbox]) model.Accuracy(['cls_prob_2nd', 'labels_int32_2nd'], 'accuracy_cls_2nd') model.AddLosses(['loss_cls_2nd', 'loss_bbox_2nd']) model.AddMetrics('accuracy_cls_2nd') elif stage_num == 3: cls_prob, loss_cls = model.net.SoftmaxWithLoss( ['cls_score_3rd', 'labels_int32_3rd'], ['cls_prob_3rd', 'loss_cls_3rd'], scale=model.GetLossScale() * cfg.CASCADERCNN.WEIGHT_LOSS_BBOX_STAGE3 ) loss_bbox = model.net.SmoothL1Loss( [ 'bbox_pred_3rd', 'bbox_targets_3rd', 'bbox_inside_weights_3rd', 'bbox_outside_weights_3rd' ], 'loss_bbox_3rd', scale=model.GetLossScale() * cfg.CASCADERCNN.WEIGHT_LOSS_BBOX_STAGE3 ) loss_gradients = blob_utils.get_loss_gradients(model, [loss_cls, loss_bbox]) model.Accuracy(['cls_prob_3rd', 'labels_int32_3rd'], 'accuracy_cls_3rd') model.AddLosses(['loss_cls_3rd', 'loss_bbox_3rd']) model.AddMetrics('accuracy_cls_3rd') return loss_gradients
def add_fpn_retinanet_losses(model): loss_gradients = {} gradients, losses = [], [] k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid model.AddMetrics(['retnet_fg_num', 'retnet_bg_num']) # ========================================================================== # bbox regression loss - SelectSmoothL1Loss for multiple anchors at a location # ========================================================================== for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) bbox_loss = model.net.SelectSmoothL1Loss( [ 'retnet_bbox_pred_' + suffix, 'retnet_roi_bbox_targets_' + suffix, 'retnet_roi_fg_bbox_locs_' + suffix, 'retnet_fg_num' ], 'retnet_loss_bbox_' + suffix, beta=cfg.RETINANET.BBOX_REG_BETA, scale=model.GetLossScale() * cfg.RETINANET.BBOX_REG_WEIGHT) gradients.append(bbox_loss) losses.append('retnet_loss_bbox_' + suffix) # ========================================================================== # cls loss - depends on softmax/sigmoid outputs # ========================================================================== for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_lvl_logits = 'retnet_cls_pred_' + suffix if not cfg.RETINANET.SOFTMAX: cls_focal_loss = model.net.SigmoidFocalLoss( [ cls_lvl_logits, 'retnet_cls_labels_' + suffix, 'retnet_fg_num' ], ['fl_{}'.format(suffix)], gamma=cfg.RETINANET.LOSS_GAMMA, alpha=cfg.RETINANET.LOSS_ALPHA, scale=model.GetLossScale(), num_classes=model.num_classes - 1) gradients.append(cls_focal_loss) losses.append('fl_{}'.format(suffix)) else: cls_focal_loss, gated_prob = model.net.SoftmaxFocalLoss( [ cls_lvl_logits, 'retnet_cls_labels_' + suffix, 'retnet_fg_num' ], ['fl_{}'.format(suffix), 'retnet_prob_{}'.format(suffix)], gamma=cfg.RETINANET.LOSS_GAMMA, alpha=cfg.RETINANET.LOSS_ALPHA, scale=model.GetLossScale(), num_classes=model.num_classes) gradients.append(cls_focal_loss) losses.append('fl_{}'.format(suffix)) loss_gradients.update(blob_utils.get_loss_gradients(model, gradients)) model.AddLosses(losses) return loss_gradients
def add_mask_rcnn_losses(model, blob_mask): """Add Mask R-CNN specific losses.""" loss_mask = model.net.SigmoidCrossEntropyLoss([blob_mask, 'masks_int32'], 'loss_mask', scale=model.GetLossScale() * cfg.MRCNN.WEIGHT_LOSS_MASK) loss_gradients = blob_utils.get_loss_gradients(model, [loss_mask]) model.AddLosses('loss_mask') return loss_gradients
def add_keypoint_losses(model): """Add Mask R-CNN keypoint specific losses.""" # Reshape input from (N, K, H, W) to (NK, HW) model.net.Reshape( ['kps_score'], ['kps_score_reshaped', '_kps_score_old_shape'], shape=(-1, cfg.KRCNN.HEATMAP_SIZE * cfg.KRCNN.HEATMAP_SIZE) ) # Softmax across **space** (woahh....space!) # Note: this is not what is commonly called "spatial softmax" # (i.e., softmax applied along the channel dimension at each spatial # location); This is softmax applied over a set of spatial locations (i.e., # each spatial location is a "class"). kps_prob, loss_kps = model.net.SoftmaxWithLoss( ['kps_score_reshaped', 'keypoint_locations_int32', 'keypoint_weights'], ['kps_prob', 'loss_kps'], scale=cfg.KRCNN.LOSS_WEIGHT / cfg.NUM_GPUS, spatial=0 ) if not cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS: # Discussion: the softmax loss above will average the loss by the sum of # keypoint_weights, i.e. the total number of visible keypoints. Since # the number of visible keypoints can vary significantly between # minibatches, this has the effect of up-weighting the importance of # minibatches with few visible keypoints. (Imagine the extreme case of # only one visible keypoint versus N: in the case of N, each one # contributes 1/N to the gradient compared to the single keypoint # determining the gradient direction). Instead, we can normalize the # loss by the total number of keypoints, if it were the case that all # keypoints were visible in a full minibatch. (Returning to the example, # this means that the one visible keypoint contributes as much as each # of the N keypoints.) model.StopGradient( 'keypoint_loss_normalizer', 'keypoint_loss_normalizer' ) loss_kps = model.net.Mul( ['loss_kps', 'keypoint_loss_normalizer'], 'loss_kps_normalized' ) loss_gradients = blob_utils.get_loss_gradients(model, [loss_kps]) model.AddLosses(loss_kps) return loss_gradients
def add_single_scale_rpn_losses(model): """Add losses for a single scale RPN model (i.e., no FPN).""" # Spatially narrow the full-sized RPN label arrays to match the feature map # shape model.net.SpatialNarrowAs(['rpn_labels_int32_wide', 'rpn_cls_logits'], 'rpn_labels_int32') for key in ('targets', 'inside_weights', 'outside_weights'): model.net.SpatialNarrowAs( ['rpn_bbox_' + key + '_wide', 'rpn_bbox_pred'], 'rpn_bbox_' + key) loss_rpn_cls = model.net.SigmoidCrossEntropyLoss( ['rpn_cls_logits', 'rpn_labels_int32'], 'loss_rpn_cls', scale=model.GetLossScale()) loss_rpn_bbox = model.net.SmoothL1Loss([ 'rpn_bbox_pred', 'rpn_bbox_targets', 'rpn_bbox_inside_weights', 'rpn_bbox_outside_weights' ], 'loss_rpn_bbox', beta=1. / 9., scale=model.GetLossScale()) loss_gradients = blob_utils.get_loss_gradients( model, [loss_rpn_cls, loss_rpn_bbox]) model.AddLosses(['loss_rpn_cls', 'loss_rpn_bbox']) return loss_gradients