def fast_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, stage=0): device_id = cls_score.get_device() rois_label = Variable(torch.from_numpy(label_int32.astype('int64'))).cuda(device_id) if cfg.CASCADE_RCNN.SCALE_LOSS: loss_scalar = cfg.CASCADE_RCNN.STAGE_WEIGHTS[stage] else: loss_scalar = 1.0 if not cfg.FAST_RCNN.FOCAL_LOSS: loss_cls = F.cross_entropy(cls_score, rois_label) * loss_scalar else: alpha = torch.ones((cls_score.shape[1], 1)) * cfg.FAST_RCNN.ALPHA alpha[0] = 1 - cfg.FAST_RCNN.ALPHA focalloss = FocalLoss(class_num=cls_score.shape[1], alpha=alpha, gamma=cfg.FAST_RCNN.GAMMA) loss_cls = focalloss(cls_score, rois_label) * loss_scalar bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id) bbox_inside_weights = Variable(torch.from_numpy(bbox_inside_weights)).cuda(device_id) bbox_outside_weights = Variable(torch.from_numpy(bbox_outside_weights)).cuda(device_id) loss_bbox = net_utils.smooth_l1_loss( bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) * loss_scalar # class accuracy cls_preds = cls_score.max(dim=1)[1].type_as(rois_label) accuracy_cls = cls_preds.eq(rois_label).float().mean(dim=0) return loss_cls, loss_bbox, accuracy_cls
def fpn_rpn_losses(**kwargs): """Add RPN on FPN specific losses.""" losses_cls = [] losses_bbox = [] for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1): slvl = str(lvl) # Spatially narrow the full-sized RPN label arrays to match the feature map shape h, w = kwargs['rpn_cls_logits_fpn' + slvl].shape[2:] rpn_labels_int32_fpn = kwargs['rpn_labels_int32_wide_fpn' + slvl][:, :, :h, :w] h, w = kwargs['rpn_bbox_pred_fpn' + slvl].shape[2:] rpn_bbox_targets_fpn = kwargs['rpn_bbox_targets_wide_fpn' + slvl][:, :, :h, :w] rpn_bbox_inside_weights_fpn = kwargs[ 'rpn_bbox_inside_weights_wide_fpn' + slvl][:, :, :h, :w] rpn_bbox_outside_weights_fpn = kwargs[ 'rpn_bbox_outside_weights_wide_fpn' + slvl][:, :, :h, :w] weight = (rpn_labels_int32_fpn >= 0).float() loss_rpn_cls_fpn = F.binary_cross_entropy_with_logits( kwargs['rpn_cls_logits_fpn' + slvl], rpn_labels_int32_fpn.float(), weight, size_average=False) loss_rpn_cls_fpn /= cfg.TRAIN.RPN_BATCH_SIZE_PER_IM * cfg.TRAIN.IMS_PER_BATCH # Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is # handled by (1) setting bbox outside weights and (2) SmoothL1Loss # normalizes by IMS_PER_BATCH loss_rpn_bbox_fpn = net_utils.smooth_l1_loss( kwargs['rpn_bbox_pred_fpn' + slvl], rpn_bbox_targets_fpn, rpn_bbox_inside_weights_fpn, rpn_bbox_outside_weights_fpn, beta=1/9) losses_cls.append(loss_rpn_cls_fpn) losses_bbox.append(loss_rpn_bbox_fpn) return torch.cat(losses_cls), torch.cat(losses_bbox)
def fast_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights): device_id = cls_score.get_device() rois_label = Variable(torch.from_numpy( label_int32.astype('int64'))).cuda(device_id) loss_cls = F.cross_entropy(cls_score, rois_label) bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id) bbox_inside_weights = Variable( torch.from_numpy(bbox_inside_weights)).cuda(device_id) bbox_outside_weights = Variable( torch.from_numpy(bbox_outside_weights)).cuda(device_id) sl1_loss_bbox = net_utils.smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) iou_loss_bbox, giou_loss_bbox = net_utils.compute_iou( bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, transform_weights=cfg.MODEL.BBOX_REG_WEIGHTS) if cfg.MODEL.LOSS_TYPE == 'smooth_l1': loss_bbox = sl1_loss_bbox elif cfg.MODEL.LOSS_TYPE == 'iou': loss_bbox = iou_loss_bbox elif cfg.MODEL.LOSS_TYPE == 'giou': loss_bbox = giou_loss_bbox else: raise ValueError('Invalid loss type: ' + cfg.MODEL.LOSS_TYPE) # class accuracy cls_preds = cls_score.max(dim=1)[1].type_as(rois_label) accuracy_cls = cls_preds.eq(rois_label).float().mean(dim=0) return loss_cls, loss_bbox, accuracy_cls, sl1_loss_bbox, iou_loss_bbox, giou_loss_bbox
def fast_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights): device_id = cls_score.get_device() rois_label = Variable(torch.from_numpy( label_int32.astype('int64'))).cuda(device_id) if len(cfg.TRAIN.CE_FINETUNE_WIGHT): ce_weight = Variable( torch.from_numpy(np.array( cfg.TRAIN.CE_FINETUNE_WIGHT)).float()).cuda(device_id) loss_cls = F.cross_entropy(cls_score, rois_label, ce_weight) else: loss_cls = F.cross_entropy(cls_score, rois_label) bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id) bbox_inside_weights = Variable( torch.from_numpy(bbox_inside_weights)).cuda(device_id) bbox_outside_weights = Variable( torch.from_numpy(bbox_outside_weights)).cuda(device_id) loss_bbox = net_utils.smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) # class accuracy cls_preds = cls_score.max(dim=1)[1].type_as(rois_label) accuracy_cls = cls_preds.eq(rois_label).float().mean(dim=0) return loss_cls, loss_bbox, accuracy_cls
def fast_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights): device_id = cls_score.get_device() rois_label = Variable(torch.from_numpy( label_int32.astype('int64'))).cuda(device_id) if cfg.FAST_RCNN.USE_CLS_WEIGHT: cls_weight = torch.ones(cls_score.size(1), dtype=torch.float32, device=device_id) cls_weight[0] = 0.2 loss_cls = F.cross_entropy(cls_score, rois_label, weight=cls_weight) else: loss_cls = F.cross_entropy(cls_score, rois_label) bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id) bbox_inside_weights = Variable( torch.from_numpy(bbox_inside_weights)).cuda(device_id) bbox_outside_weights = Variable( torch.from_numpy(bbox_outside_weights)).cuda(device_id) loss_bbox = net_utils.smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) # class accuracy # ForkedPdb().set_trace() cls_preds = cls_score.max(dim=1)[1].type_as(rois_label) accuracy_cls = cls_preds.eq(rois_label).float().mean(dim=0) return loss_cls, loss_bbox, accuracy_cls
def single_scale_rpn_losses( rpn_cls_logits, rpn_bbox_pred, rpn_labels_int32_wide, rpn_bbox_targets_wide, rpn_bbox_inside_weights_wide, rpn_bbox_outside_weights_wide): """Add losses for a single scale RPN model (i.e., no FPN).""" h, w = rpn_cls_logits.shape[2:] rpn_labels_int32 = rpn_labels_int32_wide[:, :, :h, :w] # -1 means ignore h, w = rpn_bbox_pred.shape[2:] rpn_bbox_targets = rpn_bbox_targets_wide[:, :, :h, :w] rpn_bbox_inside_weights = rpn_bbox_inside_weights_wide[:, :, :h, :w] rpn_bbox_outside_weights = rpn_bbox_outside_weights_wide[:, :, :h, :w] if cfg.RPN.CLS_ACTIVATION == 'softmax': B, C, H, W = rpn_cls_logits.size() rpn_cls_logits = rpn_cls_logits.view( B, 2, C // 2, H, W).permute(0, 2, 3, 4, 1).contiguous().view(-1, 2) rpn_labels_int32 = rpn_labels_int32.contiguous().view(-1).long() # the loss is averaged over non-ignored targets loss_rpn_cls = F.cross_entropy( rpn_cls_logits, rpn_labels_int32, ignore_index=-1) else: weight = (rpn_labels_int32 >= 0).float() loss_rpn_cls = F.binary_cross_entropy_with_logits( rpn_cls_logits, rpn_labels_int32.float(), weight, size_average=False) loss_rpn_cls /= weight.sum() loss_rpn_bbox = net_utils.smooth_l1_loss( rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, beta=1/9) return loss_rpn_cls, loss_rpn_bbox
def single_scale_rpn_losses( rpn_cls_logits, rpn_bbox_pred, rpn_labels_int32_wide, rpn_bbox_targets_wide, rpn_bbox_inside_weights_wide, rpn_bbox_outside_weights_wide): """Add losses for a single scale RPN model (i.e., no FPN).""" h, w = rpn_cls_logits.shape[2:] rpn_labels_int32 = rpn_labels_int32_wide[:, :, :h, :w] # -1 means ignore h, w = rpn_bbox_pred.shape[2:] rpn_bbox_targets = rpn_bbox_targets_wide[:, :, :h, :w] rpn_bbox_inside_weights = rpn_bbox_inside_weights_wide[:, :, :h, :w] rpn_bbox_outside_weights = rpn_bbox_outside_weights_wide[:, :, :h, :w] if cfg.RPN.CLS_ACTIVATION == 'softmax': B, C, H, W = rpn_cls_logits.size() rpn_cls_logits = rpn_cls_logits.view( B, 2, C // 2, H, W).permute(0, 2, 3, 4, 1).contiguous().view(-1, 2) rpn_labels_int32 = rpn_labels_int32.contiguous().view(-1).long() # the loss is averaged over non-ignored targets loss_rpn_cls = F.cross_entropy( rpn_cls_logits, rpn_labels_int32, ignore_index=-1) else: weight = (rpn_labels_int32 >= 0).float() loss_rpn_cls = F.binary_cross_entropy_with_logits( rpn_cls_logits, rpn_labels_int32.float(), weight, size_average=False) loss_rpn_cls /= weight.sum() loss_rpn_bbox = net_utils.smooth_l1_loss( rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, beta=1/9) return loss_rpn_cls, loss_rpn_bbox
def fast_rcnn_amodal_losses(cls_score, amodal_score, bbox_pred, label_int32, label_amodal_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights): device_id = cls_score.get_device() rois_label = Variable(torch.from_numpy( label_int32.astype('int64'))).cuda(device_id) rois_amodal_label = Variable( torch.from_numpy(label_amodal_int32.astype('int64'))).cuda(device_id) loss_cls = F.cross_entropy(cls_score, rois_label) loss_amodal = F.cross_entropy(amodal_score, rois_amodal_label) # loss_amodal = 0 bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id) bbox_inside_weights = Variable( torch.from_numpy(bbox_inside_weights)).cuda(device_id) bbox_outside_weights = Variable( torch.from_numpy(bbox_outside_weights)).cuda(device_id) loss_bbox = net_utils.smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) # class accuracy cls_preds = cls_score.max(dim=1)[1].type_as(rois_label) accuracy_cls = cls_preds.eq(rois_label).float().mean(dim=0) # amodal accuracy amodal_preds = amodal_score.max(dim=1)[1].type_as(rois_amodal_label) accuracy_amodal = amodal_preds.eq(rois_amodal_label).float().mean(dim=0) # accuracy_amodal = 0 return loss_cls, loss_amodal, loss_bbox, accuracy_cls, accuracy_amodal
def fpn_rpn_losses(**kwargs): """Add RPN on FPN specific losses.""" losses_cls = [] losses_bbox = [] for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1): slvl = str(lvl) # Spatially narrow the full-sized RPN label arrays to match the feature map shape b, c, h, w = kwargs['rpn_cls_logits_fpn' + slvl].shape rpn_labels_int32_fpn = kwargs['rpn_labels_int32_wide_fpn' + slvl][:, :, :h, :w] h, w = kwargs['rpn_bbox_pred_fpn' + slvl].shape[2:] rpn_bbox_targets_fpn = kwargs['rpn_bbox_targets_wide_fpn' + slvl][:, :, :h, :w] rpn_bbox_inside_weights_fpn = kwargs[ 'rpn_bbox_inside_weights_wide_fpn' + slvl][:, :, :h, :w] rpn_bbox_outside_weights_fpn = kwargs[ 'rpn_bbox_outside_weights_wide_fpn' + slvl][:, :, :h, :w] if cfg.RPN.CLS_ACTIVATION == 'softmax': rpn_cls_logits_fpn = kwargs['rpn_cls_logits_fpn' + slvl].view( b, 2, c // 2, h, w).permute(0, 2, 3, 4, 1).contiguous().view(-1, 2) rpn_labels_int32_fpn = rpn_labels_int32_fpn.contiguous().view(-1).long() # the loss is averaged over non-ignored targets loss_rpn_cls_fpn = F.cross_entropy( rpn_cls_logits_fpn, rpn_labels_int32_fpn, ignore_index=-1) else: # sigmoid weight = (rpn_labels_int32_fpn >= 0).float() loss_rpn_cls_fpn = F.binary_cross_entropy_with_logits( kwargs['rpn_cls_logits_fpn' + slvl], rpn_labels_int32_fpn.float(), weight, size_average=False) loss_rpn_cls_fpn /= cfg.TRAIN.RPN_BATCH_SIZE_PER_IM * cfg.TRAIN.IMS_PER_BATCH # Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is # handled by (1) setting bbox outside weights and (2) SmoothL1Loss # normalizes by IMS_PER_BATCH sl1_loss_rpn_bbox_fpn = net_utils.smooth_l1_loss( kwargs['rpn_bbox_pred_fpn' + slvl], rpn_bbox_targets_fpn, rpn_bbox_inside_weights_fpn, rpn_bbox_outside_weights_fpn, beta=1/9) iou_loss_rpn_bbox_fpn, giou_loss_rpn_bbox_fpn = net_utils.compute_iou( kwargs['rpn_bbox_pred_fpn' + slvl].permute(0, 2, 3, 1).reshape(-1, 4), rpn_bbox_targets_fpn.permute(0, 2, 3, 1).reshape(-1, 4), rpn_bbox_inside_weights_fpn.permute(0, 2, 3, 1).reshape(-1, 4), rpn_bbox_outside_weights_fpn.permute(0, 2, 3, 1).reshape(-1, 4), batch_size=cfg.TRAIN.IMS_PER_BATCH) if cfg.MODEL.RPN_LOSS_TYPE == 'smooth_l1': loss_rpn_bbox_fpn = sl1_loss_rpn_bbox_fpn elif cfg.MODEL.RPN_LOSS_TYPE == 'iou': loss_rpn_bbox_fpn = iou_loss_rpn_bbox_fpn elif cfg.MODEL.RPN_LOSS_TYPE == 'giou': loss_rpn_bbox_fpn = giou_loss_rpn_bbox_fpn else: raise ValueError('Invalid loss type: ' + cfg.MODEL.RPN_LOSS_TYPE) losses_cls.append(loss_rpn_cls_fpn) losses_bbox.append(loss_rpn_bbox_fpn * cfg.MODEL.RPN_LOSS_BBOX_WEIGHT) return losses_cls, losses_bbox
def fast_rcnn_losses(cls_score, bbox_pred, labels, cls_loss_weights, bbox_targets, bbox_inside_weights, bbox_outside_weights): cls_loss = -(F.log_softmax(cls_score, dim=1)[range(cls_score.size(0)), labels].view(-1) * cls_loss_weights).mean() bbox_loss = net_utils.smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, cls_loss_weights) return cls_loss, bbox_loss
def fpn_rpn_losses(**kwargs): """Add RPN on FPN specific losses.""" losses_cls = [] losses_bbox = [] if cfg.DEBUG: print('='*60) count = 0 for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1): slvl = str(lvl) # Spatially narrow the full-sized RPN label arrays to match the feature map shape b, c, h, w = kwargs['rpn_cls_logits_fpn' + slvl].shape rpn_labels_int32_fpn = kwargs['rpn_labels_int32_wide_fpn' + slvl][:, :, :h, :w] if cfg.DEBUG: count += (rpn_labels_int32_fpn==1).data.sum() #print('rpn positive num for %s is:' % slvl, ((rpn_labels_int32_fpn==1).data.sum())) h, w = kwargs['rpn_bbox_pred_fpn' + slvl].shape[2:] rpn_bbox_targets_fpn = kwargs['rpn_bbox_targets_wide_fpn' + slvl][:, :, :h, :w] rpn_bbox_inside_weights_fpn = kwargs[ 'rpn_bbox_inside_weights_wide_fpn' + slvl][:, :, :h, :w] rpn_bbox_outside_weights_fpn = kwargs[ 'rpn_bbox_outside_weights_wide_fpn' + slvl][:, :, :h, :w] if cfg.RPN.CLS_ACTIVATION == 'softmax': rpn_cls_logits_fpn = kwargs['rpn_cls_logits_fpn' + slvl].view( b, 2, c // 2, h, w).permute(0, 2, 3, 4, 1).contiguous().view(-1, 2) rpn_labels_int32_fpn = rpn_labels_int32_fpn.contiguous().view(-1).long() # the loss is averaged over non-ignored targets loss_rpn_cls_fpn = F.cross_entropy( rpn_cls_logits_fpn, rpn_labels_int32_fpn, ignore_index=-1) else: # sigmoid weight = (rpn_labels_int32_fpn >= 0).float() loss_rpn_cls_fpn = F.binary_cross_entropy_with_logits( kwargs['rpn_cls_logits_fpn' + slvl], rpn_labels_int32_fpn.float(), weight, size_average=False) loss_rpn_cls_fpn /= cfg.TRAIN.RPN_BATCH_SIZE_PER_IM * cfg.TRAIN.IMS_PER_BATCH # Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is # handled by (1) setting bbox outside weights and (2) SmoothL1Loss # normalizes by IMS_PER_BATCH loss_rpn_bbox_fpn = net_utils.smooth_l1_loss( kwargs['rpn_bbox_pred_fpn' + slvl], rpn_bbox_targets_fpn, rpn_bbox_inside_weights_fpn, rpn_bbox_outside_weights_fpn, beta=1/9) losses_cls.append(loss_rpn_cls_fpn) losses_bbox.append(loss_rpn_bbox_fpn) if cfg.DEBUG: print('rpn positive num is: %d' % count) return losses_cls, losses_bbox
def fast_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights): device_id = cls_score.get_device() rois_label = Variable(torch.from_numpy(label_int32.astype('int64'))).cuda(device_id) if cfg.FAST_RCNN.LOSS_TYPE in ['cross_entropy', 'triplet_softmax']: if cfg.FAST_RCNN.LOSS_TYPE == 'triplet_softmax': cls_score = cls_score * 3 # This method is borrowed from ji zhang's large scale relationship detection if not cfg.MODEL.TAGGING: loss_cls = F.cross_entropy(cls_score, rois_label) else: loss_cls = F.cross_entropy(cls_score, rois_label, ignore_index=0) if cfg.FAST_RCNN.LOSS_TYPE == 'triplet_softmax': cls_score = cls_score / 3 else: if cfg.FAST_RCNN.LOSS_TYPE == 'multi_margin': loss_cls = F.multi_margin_loss(cls_score, rois_label, margin=cfg.FAST_RCNN.MARGIN, reduction='none') elif cfg.FAST_RCNN.LOSS_TYPE == 'max_margin': cls_score_with_high_target = cls_score.clone() cls_score_with_high_target.scatter_(1, rois_label.view(-1, 1), 1e10) # This make sure the following variable always has the target in the first column target_and_offender_index = cls_score_with_high_target.sort(1, True)[1][:, :2] # Target and the largest score excpet target loss_cls = F.multi_margin_loss(cls_score.gather(1, target_and_offender_index), rois_label.data * 0, margin=cfg.FAST_RCNN.MARGIN, reduction='none') loss_cls = loss_cls[rois_label > 0] loss_cls = loss_cls.mean() if loss_cls.numel() > 0 else loss_cls.new_tensor(0) # Secretly log the mean similarity! if cfg.FAST_RCNN.LOSS_TYPE in ['triplet_softmax', 'max_margin', 'multi_margin']: loss_cls.mean_similarity = cls_score[rois_label>0].gather(1, rois_label[rois_label>0].unsqueeze(1)).mean().detach() / 3 bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id) bbox_inside_weights = Variable(torch.from_numpy(bbox_inside_weights)).cuda(device_id) bbox_outside_weights = Variable(torch.from_numpy(bbox_outside_weights)).cuda(device_id) loss_bbox = net_utils.smooth_l1_loss( bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) if cfg.MODEL.TAGGING: loss_bbox = torch.zeros_like(loss_bbox) # class accuracy cls_preds = cls_score.max(dim=1)[1].type_as(rois_label) if not cfg.MODEL.TAGGING: accuracy_cls = cls_preds.eq(rois_label).float().mean(dim=0) else: accuracy_cls = cls_preds[rois_label > 0].eq(rois_label[rois_label > 0]).float().mean(dim=0) # Ignore index 0 return loss_cls, loss_bbox, accuracy_cls
def fast_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights): device_id = cls_score.get_device() rois_label = Variable(torch.from_numpy( label_int32.astype('int64'))).cuda(device_id) loss_cls = F.cross_entropy(cls_score, rois_label) bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id) bbox_inside_weights = Variable( torch.from_numpy(bbox_inside_weights)).cuda(device_id) bbox_outside_weights = Variable( torch.from_numpy(bbox_outside_weights)).cuda(device_id) loss_bbox = net_utils.smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) return loss_cls, loss_bbox
def fast_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights): device_id = cls_score.get_device() rois_label = Variable(torch.from_numpy(label_int32.astype('int64'))).cuda(device_id) loss_cls = F.cross_entropy(cls_score, rois_label) bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id) bbox_inside_weights = Variable(torch.from_numpy(bbox_inside_weights)).cuda(device_id) bbox_outside_weights = Variable(torch.from_numpy(bbox_outside_weights)).cuda(device_id) loss_bbox = net_utils.smooth_l1_loss( bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) # class accuracy cls_preds = cls_score.max(dim=1)[1].type_as(rois_label) accuracy_cls = cls_preds.eq(rois_label).float().mean(dim=0) return loss_cls, loss_bbox, accuracy_cls
def position_losses(position_cls_pred, position_reg_pred, roidb, \ position_inside_weights=np.array((1.0),dtype=np.float32), \ position_outside_weights=np.array((1.0),dtype=np.float32)): device_id = position_cls_pred.get_device() position_reg_targets = np.zeros((len(roidb))) position_cls_targets = np.zeros((len(roidb))) position_cls_bins = np.array((0.58, 0.72, 1)) for idx, entry in enumerate(roidb): position_reg_targets[idx] = entry['z_position'] position_cls_targets[idx] = np.digitize(entry['z_position'], position_cls_bins) #print('pos', position_pred.cpu(), position_reg_targets) # note: only support multi-modal now. # expand *3 if cfg.LESION.MULTI_MODALITY: position_cls_targets = np.tile(position_cls_targets, (3)) position_reg_targets = np.tile(position_reg_targets, (3)) if USE_CLS: position_cls_targets = Variable( torch.from_numpy( position_cls_targets.astype('int64'))).cuda(device_id) cls_loss = 0.10 * F.cross_entropy(position_cls_pred, position_cls_targets) cls_preds = position_cls_pred.max( dim=1)[1].type_as(position_cls_targets) accuracy_cls = cls_preds.eq(position_cls_targets).float().mean(dim=0) if USE_REG: position_reg_targets = Variable( torch.from_numpy(position_reg_targets.astype( np.float32))).cuda(device_id) position_inside_weights = Variable( torch.from_numpy(position_inside_weights)).cuda(device_id) position_outside_weights = Variable( torch.from_numpy(position_outside_weights)).cuda(device_id) reg_loss = net_utils.smooth_l1_loss(position_reg_pred, position_reg_targets, position_inside_weights, position_outside_weights) return cls_loss, reg_loss, accuracy_cls
def fpn_rpn_losses(**kwargs): """Add RPN on FPN specific losses.""" losses_cls = [] losses_bbox = [] for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1): slvl = str(lvl) # Spatially narrow the full-sized RPN label arrays to match the feature map shape b, c, h, w = kwargs['rpn_cls_logits_fpn' + slvl].shape rpn_labels_int32_fpn = kwargs['rpn_labels_int32_wide_fpn' + slvl][:, :, :h, :w] h, w = kwargs['rpn_bbox_pred_fpn' + slvl].shape[2:] rpn_bbox_targets_fpn = kwargs['rpn_bbox_targets_wide_fpn' + slvl][:, :, :h, :w] rpn_bbox_inside_weights_fpn = kwargs[ 'rpn_bbox_inside_weights_wide_fpn' + slvl][:, :, :h, :w] rpn_bbox_outside_weights_fpn = kwargs[ 'rpn_bbox_outside_weights_wide_fpn' + slvl][:, :, :h, :w] if cfg.RPN.CLS_ACTIVATION == 'softmax': rpn_cls_logits_fpn = kwargs['rpn_cls_logits_fpn' + slvl].view( b, 2, c // 2, h, w).permute(0, 2, 3, 4, 1).contiguous().view(-1, 2) rpn_labels_int32_fpn = rpn_labels_int32_fpn.contiguous().view(-1).long() # the loss is averaged over non-ignored targets loss_rpn_cls_fpn = F.cross_entropy( rpn_cls_logits_fpn, rpn_labels_int32_fpn, ignore_index=-1) else: # sigmoid weight = (rpn_labels_int32_fpn >= 0).float() loss_rpn_cls_fpn = F.binary_cross_entropy_with_logits( kwargs['rpn_cls_logits_fpn' + slvl], rpn_labels_int32_fpn.float(), weight, size_average=False) loss_rpn_cls_fpn /= cfg.TRAIN.RPN_BATCH_SIZE_PER_IM * cfg.TRAIN.IMS_PER_BATCH # Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is # handled by (1) setting bbox outside weights and (2) SmoothL1Loss # normalizes by IMS_PER_BATCH loss_rpn_bbox_fpn = net_utils.smooth_l1_loss( kwargs['rpn_bbox_pred_fpn' + slvl], rpn_bbox_targets_fpn, rpn_bbox_inside_weights_fpn, rpn_bbox_outside_weights_fpn, beta=1/9) losses_cls.append(loss_rpn_cls_fpn) losses_bbox.append(loss_rpn_bbox_fpn) return losses_cls, losses_bbox
def light_head_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights): device_id = cls_score.get_device() rois_label = Variable(torch.from_numpy( label_int32.astype('int64'))).cuda(device_id) # logits_rois_label = Variable(torch.cuda.IntTensor(cls_score.size()[0],cls_score.size()[1]).zero_()) # for i, val in enumerate(rois_label.data): # logits_rois_label.data[i, val] = 1 if val > 0 else 0 loss_cls = F.cross_entropy( cls_score, rois_label ) # F.binary_cross_entropy_with_logits(cls_score,logits_rois_label) bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id) bbox_inside_weights = Variable( torch.from_numpy(bbox_inside_weights)).cuda(device_id) bbox_outside_weights = Variable( torch.from_numpy(bbox_outside_weights)).cuda(device_id) loss_bbox = net_utils.smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) return loss_cls, loss_bbox * 5
def fast_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights): device_id = cls_score.get_device() # introduce checks for arguments being already of type torch.tensor(), as # when split into generator and discriminator, the DataParallel-Module converts arrays already into Tensors() if isinstance(label_int32, torch.Tensor): rois_label = Variable(label_int32.long()).cuda(device_id) else: rois_label = Variable(torch.from_numpy( label_int32.astype('int64'))).cuda(device_id) loss_cls = F.cross_entropy(cls_score, rois_label) if isinstance(bbox_targets, torch.Tensor): bbox_targets = Variable(bbox_targets).cuda(device_id) else: bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id) if isinstance(bbox_inside_weights, torch.Tensor): bbox_inside_weights = Variable(bbox_inside_weights).cuda(device_id) else: bbox_inside_weights = Variable( torch.from_numpy(bbox_inside_weights)).cuda(device_id) if isinstance(bbox_outside_weights, torch.Tensor): bbox_outside_weights = Variable(bbox_outside_weights).cuda(device_id) else: bbox_outside_weights = Variable( torch.from_numpy(bbox_outside_weights)).cuda(device_id) loss_bbox = net_utils.smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) # class accuracy cls_preds = cls_score.max(dim=1)[1].type_as(rois_label) accuracy_cls = cls_preds.eq(rois_label).float().mean(dim=0) return loss_cls, loss_bbox, accuracy_cls
def fast_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, cls_weights=None, id_score=None, id_int32=None): device_id = cls_score.get_device() rois_label = Variable(torch.from_numpy( label_int32.astype('int64'))).cuda(device_id) loss_cls = F.cross_entropy(cls_score, rois_label, weight=cls_weights) bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id) bbox_inside_weights = Variable( torch.from_numpy(bbox_inside_weights)).cuda(device_id) bbox_outside_weights = Variable( torch.from_numpy(bbox_outside_weights)).cuda(device_id) loss_bbox = net_utils.smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) # class accuracy cls_preds = cls_score.max(dim=1)[1].type_as(rois_label) accuracy_cls = cls_preds.eq(rois_label).float().mean(dim=0) #id loss and accuracy if cfg.MODEL.IDENTITY_TRAINING: assert (not id_score is None and not id_int32 is None) id_label = Variable(torch.from_numpy( id_int32.astype('int64'))).cuda(device_id) loss_id = F.cross_entropy(id_score, id_label) id_preds = id_score.max(dim=1)[1].type_as(rois_label) accuracy_id = id_preds.eq(id_label).float().mean(dim=0) return loss_cls, loss_bbox, accuracy_cls, loss_id, accuracy_id else: return loss_cls, loss_bbox, accuracy_cls
def position_rcnn_losses(position_cls_pred, position_reg_pred, roidb, \ position_inside_weights=np.array((1.0),dtype=np.float32), \ position_outside_weights=np.array((0.01),dtype=np.float32)): bs = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM device_id = position_cls_pred.get_device() position_reg_targets = np.zeros((len(roidb) * bs)) position_cls_targets = np.zeros((len(roidb) * bs)) position_cls_bins = np.array((0.58, 0.72, 1)) for idx, entry in enumerate(roidb): position_reg_targets[idx * bs:(idx + 1) * bs] = entry['z_position'] position_cls_targets[idx * bs:(idx + 1) * bs] = np.digitize( entry['z_position'], position_cls_bins) #print('pos', position_pred.cpu(), position_reg_targets) #print(position_cls_targets) if USE_CLS: position_cls_targets = Variable( torch.from_numpy( position_cls_targets.astype('int64'))).cuda(device_id) cls_loss = F.cross_entropy(position_cls_pred, position_cls_targets) cls_preds = position_cls_pred.max( dim=1)[1].type_as(position_cls_targets) accuracy_cls = cls_preds.eq(position_cls_targets).float().mean(dim=0) print('rcnn position cls acc: ', accuracy_cls, accuracy_cls.cpu()) if USE_REG: position_reg_targets = Variable( torch.from_numpy(position_reg_targets.astype( np.float32))).cuda(device_id) position_inside_weights = Variable( torch.from_numpy(position_inside_weights)).cuda(device_id) position_outside_weights = Variable( torch.from_numpy(position_outside_weights)).cuda(device_id) reg_loss = net_utils.smooth_l1_loss(position_reg_pred, position_reg_targets, position_inside_weights, position_outside_weights) return cls_loss, reg_loss, accuracy_cls
def fpn_rpn_losses(**kwargs): """Add RPN on FPN specific losses.""" query_type = kwargs['query_type'].int() losses_cls = [] losses_bbox = [] for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1): slvl = str(lvl) # Spatially narrow the full-sized RPN label arrays to match the feature map shape device_id = kwargs['rpn_cls_logits_fpn' + slvl].get_device() b, c, h, w = kwargs['rpn_cls_logits_fpn' + slvl].shape rpn_labels_int32_fpn = kwargs['rpn_labels_int32_wide_fpn' + slvl][:, :, :h, :w] * query_type[:, None, None, None] h, w = kwargs['rpn_bbox_pred_fpn' + slvl].shape[2:] rpn_bbox_targets_fpn = kwargs['rpn_bbox_targets_wide_fpn' + slvl][:, :, :h, :w] rpn_bbox_inside_weights_fpn = kwargs['rpn_bbox_inside_weights_wide_fpn' + slvl][:, :, :h, :w] rpn_bbox_outside_weights_fpn = kwargs[ 'rpn_bbox_outside_weights_wide_fpn' + slvl][:, :, :h, :w] if cfg.RPN.CLS_ACTIVATION == 'softmax': rpn_cls_logits_fpn = kwargs['rpn_cls_logits_fpn' + slvl].view( b, 2, c // 2, h, w).permute(0, 2, 3, 4, 1).contiguous().view(-1, 2) rpn_labels_int32_fpn = rpn_labels_int32_fpn.contiguous().view( -1).long() # the loss is averaged over non-ignored targets loss_rpn_cls_fpn = F.cross_entropy(rpn_cls_logits_fpn, rpn_labels_int32_fpn, ignore_index=-1) else: # sigmoid weight = (rpn_labels_int32_fpn >= 0).float() loss_rpn_cls_fpn = F.binary_cross_entropy_with_logits( kwargs['rpn_cls_logits_fpn' + slvl], rpn_labels_int32_fpn.float(), weight, size_average=False) loss_rpn_cls_fpn /= cfg.TRAIN.RPN_BATCH_SIZE_PER_IM * cfg.TRAIN.IMS_PER_BATCH # Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is # handled by (1) setting bbox outside weights and (2) SmoothL1Loss # normalizes by IMS_PER_BATCH kwargs['rpn_bbox_pred_fpn' + slvl] = kwargs['rpn_bbox_pred_fpn' + slvl][query_type == 1] rpn_bbox_targets_fpn = rpn_bbox_targets_fpn[query_type == 1] rpn_bbox_inside_weights_fpn = rpn_bbox_inside_weights_fpn[query_type == 1] rpn_bbox_outside_weights_fpn = rpn_bbox_outside_weights_fpn[query_type == 1] if len(kwargs['rpn_bbox_pred_fpn' + slvl]) > 0: loss_rpn_bbox_fpn = net_utils.smooth_l1_loss( kwargs['rpn_bbox_pred_fpn' + slvl], rpn_bbox_targets_fpn, rpn_bbox_inside_weights_fpn, rpn_bbox_outside_weights_fpn, beta=1 / 9) else: loss_rpn_bbox_fpn = Variable( torch.tensor(0).float()).cuda(device_id) losses_cls.append(loss_rpn_cls_fpn) losses_bbox.append(loss_rpn_bbox_fpn) return losses_cls, losses_bbox
def dp_uvia_losses(self, U_estimated, V_estimated, Index_UV, Ann_Index, body_uv_X_points, body_uv_Y_points, body_uv_I_points, body_uv_Ind_points, body_uv_U_points, body_uv_V_points, body_uv_point_weights, body_uv_ann_labels, body_uv_ann_weights): """Mask R-CNN body uv specific losses.""" device_id = U_estimated.get_device() ## Reshape for GT blobs. ## Concat Ind,x,y to get Coordinates blob. Coordinates = torch.cat( (body_uv_Ind_points.unsqueeze(2), body_uv_X_points.unsqueeze(2), body_uv_Y_points.unsqueeze(2)), dim=2) ## ### Now reshape UV blobs, such that they are 1x1x(196*NumSamples)xNUM_PATCHES ## U blob to ## U_points_reshaped = body_uv_U_points.view(-1, cfg.DANET.NUM_PATCHES + 1, 196) U_points_reshaped_transpose = torch.transpose(U_points_reshaped, 1, 2).contiguous() U_points = U_points_reshaped_transpose.view(1, 1, -1, cfg.DANET.NUM_PATCHES + 1) ## V blob ## V_points_reshaped = body_uv_V_points.view(-1, cfg.DANET.NUM_PATCHES + 1, 196) V_points_reshaped_transpose = torch.transpose(V_points_reshaped, 1, 2).contiguous() V_points = V_points_reshaped_transpose.view(1, 1, -1, cfg.DANET.NUM_PATCHES + 1) ### ## UV weights blob ## Uv_point_weights_reshaped = body_uv_point_weights.view(-1, cfg.DANET.NUM_PATCHES + 1, 196) Uv_point_weights_reshaped_transpose = torch.transpose(Uv_point_weights_reshaped, 1, 2).contiguous() Uv_point_weights = Uv_point_weights_reshaped_transpose.view(1, 1, -1, cfg.DANET.NUM_PATCHES + 1) ##################### ### Pool IUV for points via bilinear interpolation. Coordinates[:, :, 1:3] -= cfg.DANET.HEATMAP_SIZE / 2. Coordinates[:, :, 1:3] *= 2. / cfg.DANET.HEATMAP_SIZE grid = Coordinates[:, :, 1:3].unsqueeze(1) interp_U = F.grid_sample(U_estimated, grid) interp_U = torch.transpose(interp_U.squeeze(2), 1, 2).contiguous() interp_V = F.grid_sample(V_estimated, grid) interp_V = torch.transpose(interp_V.squeeze(2), 1, 2).contiguous() interp_Index_UV = F.grid_sample(Index_UV, grid) interp_Index_UV = torch.transpose(interp_Index_UV.squeeze(2), 1, 2).contiguous() interp_Index_UV = interp_Index_UV.view(-1, cfg.DANET.NUM_PATCHES + 1) ## Reshape interpolated UV coordinates to apply the loss. interp_U_reshaped = interp_U.view(1, 1, -1, cfg.DANET.NUM_PATCHES + 1) interp_V_reshaped = interp_V.view(1, 1, -1, cfg.DANET.NUM_PATCHES + 1) ### ### Do the actual labels here !!!! ## The mask segmentation loss (dense) num_cls_Index = Ann_Index.size(1) Ann_Index_reshaped = Ann_Index.view(-1, num_cls_Index, cfg.DANET.HEATMAP_SIZE ** 2) Ann_Index_reshaped = torch.transpose(Ann_Index_reshaped, 1, 2).contiguous().view(-1, num_cls_Index) body_uv_ann_labels_reshaped_int = body_uv_ann_labels.to(torch.int64) loss_seg_AnnIndex = F.cross_entropy(Ann_Index_reshaped, body_uv_ann_labels_reshaped_int.view(-1)) loss_seg_AnnIndex *= cfg.DANET.INDEX_WEIGHTS ## Point Patch Index Loss. I_points_reshaped = body_uv_I_points.view(-1) I_points_reshaped_int = I_points_reshaped.to(torch.int64) loss_IndexUVPoints = F.cross_entropy(interp_Index_UV, I_points_reshaped_int) loss_IndexUVPoints *= cfg.DANET.PART_WEIGHTS ## U and V point losses. loss_Upoints = net_utils.smooth_l1_loss(interp_U_reshaped, U_points, Uv_point_weights, Uv_point_weights) loss_Upoints *= cfg.DANET.POINT_REGRESSION_WEIGHTS loss_Vpoints = net_utils.smooth_l1_loss(interp_V_reshaped, V_points, Uv_point_weights, Uv_point_weights) loss_Vpoints *= cfg.DANET.POINT_REGRESSION_WEIGHTS return loss_Upoints, loss_Vpoints, loss_IndexUVPoints, loss_seg_AnnIndex