def __call__(self, pred_cls_list, pred_reg_list, anchors_list, gt_boxes, im_info): all_anchors = torch.cat(anchors_list, axis=0) all_pred_cls = torch.cat(pred_cls_list, axis=1).reshape(-1, config.num_classes - 1) all_pred_cls = torch.sigmoid(all_pred_cls) all_pred_reg = torch.cat(pred_reg_list, axis=1).reshape(-1, 4) # get ground truth labels, bbox_target = retina_anchor_target(all_anchors, gt_boxes, im_info, top_k=1) # regression loss fg_mask = (labels > 0).flatten() valid_mask = (labels >= 0).flatten() loss_reg = smooth_l1_loss(all_pred_reg[fg_mask], bbox_target[fg_mask], config.smooth_l1_beta) loss_cls = focal_loss(all_pred_cls[valid_mask], labels[valid_mask], config.focal_loss_alpha, config.focal_loss_gamma) num_pos_anchors = fg_mask.sum().item() self.loss_normalizer = self.loss_normalizer_momentum * self.loss_normalizer + ( 1 - self.loss_normalizer_momentum) * max(num_pos_anchors, 1) loss_reg = loss_reg.sum() / self.loss_normalizer loss_cls = loss_cls.sum() / self.loss_normalizer loss_dict = {} loss_dict['retina_focal_loss'] = loss_cls loss_dict['retina_smooth_l1'] = loss_reg return loss_dict
def forward(self, features, im_info, boxes=None): # prediction pred_cls_score_list = [] pred_bbox_offsets_list = [] batch=features[0].shape[0] for x in features: t = F.relu(self.rpn_conv(x)) pred_cls_score_list.append(self.rpn_cls_score(t)) pred_bbox_offsets_list.append(self.rpn_bbox_offsets(t)) # get anchors all_anchors_list = [] # stride: 64,32,16,8,4 p6->p2 base_stride = 4 off_stride = 2**(len(features)-1) # 16 for fm in features: layer_anchors = self.anchors_generator(fm, base_stride, off_stride) off_stride = off_stride // 2 all_anchors_list.append(layer_anchors) # sample from the predictions rpn_rois,shapes = find_top_rpn_proposals( self.training, pred_bbox_offsets_list, pred_cls_score_list, all_anchors_list, im_info,batch) rpn_rois = rpn_rois.type_as(features[0]) if self.training: rpn_labels, rpn_bbox_targets = fpn_anchor_target( boxes, im_info, all_anchors_list) #rpn_labels = rpn_labels.astype(np.int32) pred_cls_score, pred_bbox_offsets = fpn_rpn_reshape( pred_cls_score_list, pred_bbox_offsets_list) # rpn loss valid_masks = rpn_labels >= 0 objectness_loss = softmax_loss( pred_cls_score[valid_masks], rpn_labels[valid_masks]) pos_masks = rpn_labels > 0 localization_loss = smooth_l1_loss( pred_bbox_offsets[pos_masks], rpn_bbox_targets[pos_masks], config.rpn_smooth_l1_beta) normalizer = 1 / valid_masks.sum().item() loss_rpn_cls = objectness_loss.sum() * normalizer loss_rpn_loc = localization_loss.sum() * normalizer loss_dict = {} loss_dict['loss_rpn_cls'] = loss_rpn_cls loss_dict['loss_rpn_loc'] = loss_rpn_loc return rpn_rois, loss_dict else: return rpn_rois,shapes
def forward(self, fpn_fms, proposals, labels=None, bbox_targets=None): # input p2-p5 fpn_fms = fpn_fms[1:][::-1] stride = [4, 8, 16, 32] pool_features = roi_pooler(fpn_fms, proposals, stride, (7, 7), "ROIAlignV2") flatten_feature = torch.flatten(pool_features, start_dim=1) flatten_feature = F.relu_(self.fc1(flatten_feature)) flatten_feature = F.relu_(self.fc2(flatten_feature)) pred_cls = self.pred_cls(flatten_feature) pred_delta = self.pred_delta(flatten_feature) if self.training: labels = labels.long().flatten() fg_masks = labels > 0 valid_masks = labels >= 0 # loss for regression localization_loss = smooth_l1_loss(pred_delta[fg_masks], bbox_targets[fg_masks], config.rcnn_smooth_l1_beta) # loss for classification objectness_loss = softmax_loss(pred_cls, labels) objectness_loss = objectness_loss * valid_masks normalizer = 1.0 / valid_masks.sum().item() loss_rcnn_loc = localization_loss.sum() * normalizer loss_rcnn_cls = objectness_loss.sum() * normalizer loss_dict = {} loss_dict[self.stage_name + '_loc'] = loss_rcnn_loc loss_dict[self.stage_name + '_cls'] = loss_rcnn_cls # proposals with torch.no_grad(): pred_bbox = restore_bbox(proposals[:, 1:5], pred_delta, True).detach() pred_proposals = torch.cat( [proposals[:, 0].reshape(-1, 1), pred_bbox], axis=1) #pred_proposals = batch_clip_boxes_opr(pred_proposals, im_info) return pred_proposals, loss_dict else: pred_bbox = restore_bbox(proposals[:, 1:5], pred_delta, True).detach() pred_proposals = torch.cat( [proposals[:, 0].reshape(-1, 1), pred_bbox], axis=1) pred_scores = F.softmax(pred_cls, dim=-1)[:, 1].reshape(-1, 1) return pred_proposals, pred_scores
def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None): # input p2-p5 fpn_fms = fpn_fms[1:][::-1] stride = [4, 8, 16, 32] pool_features = roi_pooler(fpn_fms, rcnn_rois, stride, (7, 7), "ROIAlignV2") flatten_feature = torch.flatten(pool_features, start_dim=1) flatten_feature = F.relu_(self.fc1(flatten_feature)) flatten_feature = F.relu_(self.fc2(flatten_feature)) pred_cls = self.pred_cls(flatten_feature) pred_delta = self.pred_delta(flatten_feature) if self.training: # loss for regression labels = labels.long().flatten() fg_masks = labels > 0 valid_masks = labels >= 0 # multi class pred_delta = pred_delta.reshape(-1, config.num_classes, 4) fg_gt_classes = labels[fg_masks] pred_delta = pred_delta[fg_masks, fg_gt_classes, :] localization_loss = smooth_l1_loss(pred_delta, bbox_targets[fg_masks], config.rcnn_smooth_l1_beta) # loss for classification objectness_loss = softmax_loss(pred_cls, labels) objectness_loss = objectness_loss * valid_masks normalizer = 1.0 / valid_masks.sum().item() loss_rcnn_loc = localization_loss.sum() * normalizer loss_rcnn_cls = objectness_loss.sum() * normalizer loss_dict = {} loss_dict['loss_rcnn_loc'] = loss_rcnn_loc loss_dict['loss_rcnn_cls'] = loss_rcnn_cls return loss_dict else: class_num = pred_cls.shape[-1] - 1 tag = torch.arange(class_num).type_as(pred_cls) + 1 tag = tag.repeat(pred_cls.shape[0], 1).reshape(-1, 1) pred_scores = F.softmax(pred_cls, dim=-1)[:, 1:].reshape(-1, 1) pred_delta = pred_delta[:, 4:].reshape(-1, 4) base_rois = rcnn_rois[:, 1:5].repeat(1, class_num).reshape(-1, 4) pred_bbox = restore_bbox(base_rois, pred_delta, True) pred_bbox = torch.cat([pred_bbox, pred_scores, tag], axis=1) return pred_bbox