def __init__(self, dim_in, spatial_scales): super().__init__() self.dim_in = dim_in self.spatial_scales = spatial_scales self.dim_out = self.dim_in num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) # Create conv ops shared by all FPN levels self.FPN_RPN_conv = nn.Conv2d(dim_in, self.dim_out, 3, 1, 1) dim_score = num_anchors * 2 if cfg.RPN.CLS_ACTIVATION == 'softmax' \ else num_anchors self.FPN_RPN_cls_score = nn.Conv2d(self.dim_out, dim_score, 1, 1, 0) self.FPN_RPN_bbox_pred = nn.Conv2d(self.dim_out, 4 * num_anchors, 1, 1, 0) self.GenerateProposals_modules = nn.ModuleList() k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid for lvl in range(k_min, k_max + 1): sc = self.spatial_scales[k_max - lvl] # in reversed order lvl_anchors = generate_anchors( stride = 2. ** lvl, sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2. ** (lvl - k_min),), aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS ) self.GenerateProposals_modules.append(GenerateProposalsOp(lvl_anchors, sc)) self.CollectAndDistributeFpnRpnProposals = CollectAndDistributeFpnRpnProposalsOp() self._init_weights()
def __init__(self, dim_in, spatial_scales): super().__init__() self.dim_in = dim_in self.spatial_scales = spatial_scales self.dim_out = self.dim_in num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) # Create conv ops shared by all FPN levels # 12/25,RPN 3D CONTEXT TEST if cfg.LESION.CONCAT_BEFORE_RPN: #self.cbam = CBAM(self.dim_in*cfg.LESION.NUM_IMAGES_3DCE, 16) #self.FPN_RPN_conv_embedding = nn.Conv2d(self.dim_in*cfg.LESION.NUM_IMAGES_3DCE, self.dim_in, 1) self.FPN_RPN_conv = nn.Conv2d(self.dim_in*cfg.LESION.NUM_IMAGES_3DCE, self.dim_in, 1) else: self.FPN_RPN_conv = nn.Conv2d(self.dim_in, self.dim_out, 3, 1, 1) #elif cfg.LESION.SUM_BEFORE_RPN: #self.FPN_RPN_conv_embedding = nn.Conv2d(self.dim_in, self.dim_in, 1) #self.FPN_RPN_conv = nn.Conv2d(self.dim_in*cfg.LESION.NUM_IMAGES_3DCE, self.dim_out, 3, 1, 1) dim_score = num_anchors * 2 if cfg.RPN.CLS_ACTIVATION == 'softmax' \ else num_anchors self.FPN_RPN_cls_score = nn.Conv2d(self.dim_out, dim_score, 1, 1, 0) self.FPN_RPN_bbox_pred = nn.Conv2d(self.dim_out, 4 * num_anchors, 1, 1, 0) self.GenerateProposals_modules = nn.ModuleList() k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid #anchor_scale = [0,0,2,3,4,6,12] #anchor_scale = [0,0,1,2,4,8,16] #anchor_scale = [0,0,1,2,3,6,12] for lvl in range(k_min, k_max + 1): sc = self.spatial_scales[k_max - lvl] # in reversed order lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), #sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * anchor_scale[lvl], ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS ) self.GenerateProposals_modules.append(GenerateProposalsOp(lvl_anchors, sc)) self.CollectAndDistributeFpnRpnProposals = CollectAndDistributeFpnRpnProposalsOp() self._init_weights()
def __init__(self, dim_in, spatial_scales): super().__init__() self.dim_in = dim_in self.spatial_scales = spatial_scales self.dim_out = self.dim_in num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) # Create conv ops shared by all FPN levels '''i think this is the head part, they go across a conv with equal-input-output-size, and then go through the score and bbox subnet separately''' self.FPN_RPN_conv = nn.Conv2d(dim_in, self.dim_out, 3, 1, 1) dim_score = num_anchors * 2 if cfg.RPN.CLS_ACTIVATION == 'softmax' \ else num_anchors self.FPN_RPN_cls_score = nn.Conv2d(self.dim_out, dim_score, 1, 1, 0) self.FPN_RPN_bbox_pred = nn.Conv2d(self.dim_out, 4 * num_anchors, 1, 1, 0) self.GenerateProposals_modules = nn.ModuleList() k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid for lvl in range(k_min, k_max + 1): '''3..0''' sc = self.spatial_scales[k_max - lvl] # in reversed order '''FIXME the part to work with!!''' lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS) self.GenerateProposals_modules.append( GenerateProposalsOp(lvl_anchors, sc)) self.CollectAndDistributeFpnRpnProposals = CollectAndDistributeFpnRpnProposalsOp( ) self._init_weights()
def _forward(self, data, im_info, roidb=None, **rpn_kwargs): im_data = data if self.training: roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb)) device_id = im_data.get_device() return_dict = {} # A dict to collect return variables blob_conv = self.Conv_Body(im_data) rpn_ret = self.RPN(blob_conv, im_info, roidb) # if self.training: # # can be used to infer fg/bg ratio # return_dict['rois_label'] = rpn_ret['labels_int32'] if cfg.FPN.FPN_ON: # Retain only the blobs that will be used for RoI heads. `blob_conv` may include # extra blobs that are used for RPN proposals, but not for RoI heads. blob_conv = blob_conv[-self.num_roi_levels:] if not self.training: return_dict['blob_conv'] = blob_conv if not cfg.MODEL.RPN_ONLY: if cfg.MODEL.SHARE_RES5 and self.training: box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret) else: box_feat = self.Box_Head(blob_conv, rpn_ret) cls_score, bbox_pred = self.Box_Outs(box_feat) else: # TODO: complete the returns for RPN only situation pass if self.training: return_dict['losses'] = {} return_dict['metrics'] = {} # rpn loss rpn_kwargs.update( dict((k, rpn_ret[k]) for k in rpn_ret.keys() if (k.startswith('rpn_cls_logits') or k.startswith('rpn_bbox_pred')))) loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses( **rpn_kwargs) if cfg.FPN.FPN_ON: for k, lvl in enumerate( range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1)): return_dict['losses']['loss_rpn_cls_fpn%d' % lvl] = loss_rpn_cls[k] return_dict['losses']['loss_rpn_bbox_fpn%d' % lvl] = loss_rpn_bbox[k] else: return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox # bbox loss loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses( cls_score, bbox_pred, rpn_ret['labels_int32'], rpn_ret['bbox_targets'], rpn_ret['bbox_inside_weights'], rpn_ret['bbox_outside_weights'], stage=0) return_dict['losses']['loss_cls'] = loss_cls return_dict['losses']['loss_bbox'] = loss_bbox return_dict['metrics']['accuracy_cls'] = accuracy_cls if cfg.MODEL.MASK_ON: if getattr(self.Mask_Head, 'SHARE_RES5', False): mask_feat = self.Mask_Head( res5_feat, rpn_ret, roi_has_mask_int32=rpn_ret['roi_has_mask_int32']) else: mask_feat = self.Mask_Head(blob_conv, rpn_ret) mask_pred = self.Mask_Outs(mask_feat) # return_dict['mask_pred'] = mask_pred # mask loss loss_mask = mask_rcnn_heads.mask_rcnn_losses( mask_pred, rpn_ret['masks_int32']) return_dict['losses']['loss_mask'] = loss_mask if cfg.MODEL.KEYPOINTS_ON: if getattr(self.Keypoint_Head, 'SHARE_RES5', False): # No corresponding keypoint head implemented yet (Neither in Detectron) # Also, rpn need to generate the label 'roi_has_keypoints_int32' kps_feat = self.Keypoint_Head( res5_feat, rpn_ret, roi_has_keypoints_int32=rpn_ret[ 'roi_has_keypoint_int32']) else: kps_feat = self.Keypoint_Head(blob_conv, rpn_ret) kps_pred = self.Keypoint_Outs(kps_feat) # return_dict['keypoints_pred'] = kps_pred # keypoints loss if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS: loss_keypoints = keypoint_rcnn_heads.keypoint_losses( kps_pred, rpn_ret['keypoint_locations_int32'], rpn_ret['keypoint_weights']) else: loss_keypoints = keypoint_rcnn_heads.keypoint_losses( kps_pred, rpn_ret['keypoint_locations_int32'], rpn_ret['keypoint_weights'], rpn_ret['keypoint_loss_normalizer']) return_dict['losses']['loss_kps'] = loss_keypoints # pytorch0.4 bug on gathering scalar(0-dim) tensors for k, v in return_dict['losses'].items(): return_dict['losses'][k] = v.unsqueeze(0) for k, v in return_dict['metrics'].items(): return_dict['metrics'][k] = v.unsqueeze(0) if not cfg.FAST_RCNN.USE_CASCADE: return_dict['rois'] = rpn_ret['rois'] return_dict['cls_score'] = cls_score return_dict['bbox_pred'] = bbox_pred else: return_dict['rois' + '_{}'.format(0)] = rpn_ret['rois'] return_dict['cls_score' + '_{}'.format(0)] = cls_score.detach() return_dict['bbox_pred' + '_{}'.format(0)] = bbox_pred.detach() if cfg.FAST_RCNN.USE_CASCADE: for i in range(2): i += 1 pre_stage_name = '_{}'.format(i - 1) cls_score_cpu = cls_score.data.cpu().numpy() import utils.boxes as box_utils bbox_pred_cpu = bbox_pred.reshape( [-1, bbox_pred.shape[-1]]).data.cpu().numpy().squeeze() rois = deepcopy(return_dict['rois' + pre_stage_name]) assert cfg.MODEL.CLS_AGNOSTIC_BBOX_REG is True if not cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: cls_loc = np.argmax(cls_score_cpu[:, 1:], axis=1) + 1 cls_loc = np.reshape(cls_loc, (cls_loc.shape[0], 1)) # Based on scores, we can select transformed rois scores = np.zeros((cls_score_cpu.shape[0], 1)) for k in range(len(cls_loc)): scores[k] = cls_score_cpu[k, cls_loc[k]] batch_inds = rois[:, 0] uni_inds = np.unique(batch_inds) # WE suppose the WIDTH of image is EQUAL to its HEIGHT # We also provide an example to show how to perform the operation batch-wise # Scale forward batch_se = [] for e in range(len(uni_inds)): id_min = min(np.where(batch_inds == uni_inds[e])[0]) id_max = max(np.where(batch_inds == uni_inds[e])[0]) rois[id_min:id_max + 1, 1:5] /= im_info[e][2] batch_se.append([id_min, id_max]) pred_boxes = box_utils.bbox_transform( rois[:, 1:5], bbox_pred_cpu, cfg.CASCADE_RCNN.BBOX_REG_WEIGHTS[i]) # Scale back for e in range(len(uni_inds)): id_min = batch_se[e][0] id_max = batch_se[e][1] pred_boxes[id_min:id_max + 1] *= im_info[e][2] pred_boxes[id_min:id_max + 1] = box_utils.clip_tiled_boxes( pred_boxes[id_min:id_max + 1], im_info[e][0:2]) cfg_key = 'TRAIN' if self.training else 'TEST' min_size = cfg[cfg_key].RPN_MIN_SIZE if not cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Cannot use for loop here which may cause "illegal memory access" # Thanks to Chen-Wei Xie ! rows = pred_boxes.shape[0] b3 = cls_loc * 4 + np.array([0, 1, 2, 3]) b4 = np.array(range(rows)) c = pred_boxes[np.repeat(b4, 4), b3.flatten()] proposals = np.reshape(c, (-1, 4)) else: proposals = pred_boxes[:, 4:8] keep = _filter_boxes(proposals, min_size, im_info[0]) proposals = proposals[keep, :] batch_inds = batch_inds[keep] batch_inds = np.reshape(batch_inds, [len(batch_inds), 1]) proposals = np.concatenate((batch_inds, proposals), axis=1) from modeling.collect_and_distribute_fpn_rpn_proposals import CollectAndDistributeFpnRpnProposalsOp self.CollectAndDistributeFpnRpnProposals = CollectAndDistributeFpnRpnProposalsOp( ) self.CollectAndDistributeFpnRpnProposals.training = self.training # proposals.astype('float32') blobs_out = self.CollectAndDistributeFpnRpnProposals(proposals, roidb, im_info, stage=i) # Update rpn_ret new_rpn_ret = {} for key, value in rpn_ret.items(): if 'rpn' in key: new_rpn_ret[key] = value new_rpn_ret.update(blobs_out) if not self.training: return_dict['blob_conv'] = blob_conv if not cfg.MODEL.RPN_ONLY: if i == 1: if cfg.MODEL.SHARE_RES5 and self.training: box_feat, res5_feat = self.Box_Head_2( blob_conv, new_rpn_ret) else: box_feat = self.Box_Head_2(blob_conv, new_rpn_ret) cls_score, bbox_pred = self.Box_Outs_2(box_feat) elif i == 2: if cfg.MODEL.SHARE_RES5 and self.training: box_feat, res5_feat = self.Box_Head_3( blob_conv, new_rpn_ret) else: box_feat = self.Box_Head_3(blob_conv, new_rpn_ret) cls_score, bbox_pred = self.Box_Outs_3(box_feat) if self.training: # rpn loss rpn_kwargs.update( dict((k, new_rpn_ret[k]) for k in new_rpn_ret.keys() if (k.startswith('rpn_cls_logits') or k.startswith('rpn_bbox_pred')))) loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses( **rpn_kwargs) if cfg.FPN.FPN_ON: for k, lvl in enumerate( range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1)): return_dict['losses']['loss_rpn_cls_fpn%d' % lvl] += loss_rpn_cls[k] return_dict['losses']['loss_rpn_bbox_fpn%d' % lvl] += loss_rpn_bbox[k] else: return_dict['losses']['loss_rpn_cls'] += loss_rpn_cls return_dict['losses']['loss_rpn_bbox'] += loss_rpn_bbox # bbox loss loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses( cls_score, bbox_pred, new_rpn_ret['labels_int32'], new_rpn_ret['bbox_targets'], new_rpn_ret['bbox_inside_weights'], new_rpn_ret['bbox_outside_weights'], stage=i) return_dict['losses']['loss_cls'] += loss_cls return_dict['losses']['loss_bbox'] += loss_bbox return_dict['metrics']['accuracy_cls'] += accuracy_cls return_dict['rois' + '_{}'.format(i)] = deepcopy( new_rpn_ret['rois']) return_dict['cls_score' + '_{}'.format(i)] = cls_score.detach() return_dict['bbox_pred' + '_{}'.format(i)] = bbox_pred.detach() rpn_ret = new_rpn_ret.copy() return return_dict