def get_prediction(self, conv_feat, im_info, proposal): p = self.p bbox_mean = p.regress_target.mean bbox_std = p.regress_target.std batch_image = p.batch_image num_class = p.num_class class_agnostic = p.regress_target.class_agnostic num_reg_class = 2 if class_agnostic else num_class cls_logit, bbox_delta = self.get_output(conv_feat) bbox_delta = X.reshape(bbox_delta, shape=(batch_image, -1, 4 * num_reg_class), name='bbox_delta_reshape') bbox_xyxy = X.decode_bbox(rois=proposal, bbox_pred=bbox_delta, im_info=im_info, name='decode_bbox', bbox_mean=bbox_mean, bbox_std=bbox_std, class_agnostic=class_agnostic) cls_score = X.softmax(cls_logit, axis=-1, name='bbox_cls_score') cls_score = X.reshape(cls_score, shape=(batch_image, -1, num_class), name='bbox_cls_score_reshape') return cls_score, bbox_xyxy
def get_sampled_proposal_with_filter(self, conv_fpn_feat, gt_bbox, gt_poly, im_info, valid_ranges): p = self.p batch_image = p.batch_image proposal_wo_gt = p.subsample_proposal.proposal_wo_gt image_roi = p.subsample_proposal.image_roi fg_fraction = p.subsample_proposal.fg_fraction fg_thr = p.subsample_proposal.fg_thr bg_thr_hi = p.subsample_proposal.bg_thr_hi bg_thr_lo = p.subsample_proposal.bg_thr_lo post_nms_top_n = p.proposal.post_nms_top_n num_reg_class = p.bbox_target.num_reg_class class_agnostic = p.bbox_target.class_agnostic bbox_target_weight = p.bbox_target.weight bbox_target_mean = p.bbox_target.mean bbox_target_std = p.bbox_target.std mask_size = self.pMask.resolution (proposal, proposal_score) = self.get_all_proposal_with_filter( conv_fpn_feat, im_info, valid_ranges) (bbox, label, bbox_target, bbox_weight, match_gt_iou, mask_target) = mx.sym.ProposalMaskTarget( proposal, gt_bbox, gt_poly, valid_ranges, mask_size=mask_size, num_classes=num_reg_class, class_agnostic=class_agnostic, batch_images=batch_image, proposal_without_gt=proposal_wo_gt, image_rois=image_roi, fg_fraction=fg_fraction, fg_thresh=fg_thr, bg_thresh_hi=bg_thr_hi, bg_thresh_lo=bg_thr_lo, bbox_weight=bbox_target_weight, bbox_mean=bbox_target_mean, bbox_std=bbox_target_std, output_iou=True, filter_scales=True, name="subsample_proposal") label = X.reshape(label, (-3, -2)) bbox_target = X.reshape(bbox_target, (-3, -2)) bbox_weight = X.reshape(bbox_weight, (-3, -2)) mask_target = X.reshape(mask_target, (-3, -2)) num_fg_rois_per_img = int(image_roi * fg_fraction) mask_proposal = mx.sym.slice_axis(bbox, axis=1, begin=0, end=num_fg_rois_per_img) return bbox, label, bbox_target, bbox_weight, mask_proposal, mask_target
def get_train_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head, num_branch, scaleaware): gt_bbox = X.var("gt_bbox") im_info = X.var("im_info") if scaleaware: valid_ranges = X.var("valid_ranges") rpn_cls_label = X.var("rpn_cls_label") rpn_reg_target = X.var("rpn_reg_target") rpn_reg_weight = X.var("rpn_reg_weight") im_info = TridentResNetV2Builder.stack_branch_symbols([im_info] * num_branch) gt_bbox = TridentResNetV2Builder.stack_branch_symbols([gt_bbox] * num_branch) if scaleaware: valid_ranges = X.reshape(valid_ranges, (-3, -2)) rpn_cls_label = X.reshape(rpn_cls_label, (-3, -2)) rpn_reg_target = X.reshape(rpn_reg_target, (-3, -2)) rpn_reg_weight = X.reshape(rpn_reg_weight, (-3, -2)) rpn_feat = backbone.get_rpn_feature() rcnn_feat = backbone.get_rcnn_feature() rpn_feat = neck.get_rpn_feature(rpn_feat) rcnn_feat = neck.get_rcnn_feature(rcnn_feat) rpn_loss = rpn_head.get_loss(rpn_feat, rpn_cls_label, rpn_reg_target, rpn_reg_weight) if scaleaware: proposal, bbox_cls, bbox_target, bbox_weight = rpn_head.get_sampled_proposal_with_filter(rpn_feat, gt_bbox, im_info, valid_ranges) else: proposal, bbox_cls, bbox_target, bbox_weight = rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, im_info) roi_feat = roi_extractor.get_roi_feature(rcnn_feat, proposal) bbox_loss = bbox_head.get_loss(roi_feat, bbox_cls, bbox_target, bbox_weight) return X.group(rpn_loss + bbox_loss)
def get_loss(self, conv_feat, mask_target): pBbox = self.pBbox pMask = self.pMask batch_image = pBbox.batch_image mask_fcn_logit = self.get_output(conv_feat) scale_loss_shift = 128.0 if pMask.fp16 else 1.0 mask_fcn_logit = X.reshape( mask_fcn_logit, shape=(1, -1), name="mask_fcn_logit_reshape" ) mask_target = X.reshape( mask_target, shape=(1, -1), name="mask_target_reshape" ) mask_loss = mx.sym.contrib.SigmoidCrossEntropy( mask_fcn_logit, mask_target, grad_scale=1.0 * scale_loss_shift, name="mask_loss" ) return (mask_loss,)
def get_test_symbol(backbone, neck, rpn_head, roi_extractor, mask_roi_extractor, bbox_head, mask_head, bbox_post_processor, num_branch): rec_id, im_id, im_info, proposal, proposal_score = \ TridentFasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head, num_branch) im_info_branches = TridentResNetV2Builder.stack_branch_symbols( [im_info] * num_branch) rcnn_feat = backbone.get_rcnn_feature() rcnn_feat = neck.get_rcnn_feature(rcnn_feat) roi_feat = roi_extractor.get_roi_feature(rcnn_feat, proposal) cls_score, bbox_xyxy = bbox_head.get_prediction( roi_feat, im_info_branches, proposal) post_cls_score, post_bbox_xyxy, post_cls = bbox_post_processor.get_post_processing( cls_score, bbox_xyxy) mask_roi_feat = mask_roi_extractor.get_roi_feature( rcnn_feat, post_bbox_xyxy) mask = mask_head.get_prediction(mask_roi_feat) # fold batch size into roi size for trident only post_cls_score = X.reshape(post_cls_score, (-3, -2), name="post_cls_score_fold") post_bbox_xyxy = X.reshape(post_bbox_xyxy, (-3, -2), name="post_bbox_xyxy_fold") post_cls = X.reshape(post_cls, (-3, -2), name="post_cls_fold") return X.group([ rec_id, im_id, im_info, post_cls_score, post_bbox_xyxy, post_cls, mask ])
def get_loss(self, conv_fpn_feat, cls_label, bbox_target, bbox_weight): p = self.p batch_image = p.batch_image image_anchor = p.anchor_generate.image_anchor rpn_stride = p.anchor_generate.stride cls_logit_dict, bbox_delta_dict = self.get_output(conv_fpn_feat) scale_loss_shift = 128.0 if p.fp16 else 1.0 rpn_cls_logit_list = [] rpn_bbox_delta_list = [] for stride in rpn_stride: rpn_cls_logit = cls_logit_dict[stride] rpn_bbox_delta = bbox_delta_dict[stride] rpn_cls_logit_reshape = X.reshape( data=rpn_cls_logit, shape=(0, 2, -1), name="rpn_cls_score_reshape_stride%s" % stride ) rpn_bbox_delta_reshape = X.reshape( data=rpn_bbox_delta, shape=(0, 0, -1), name="rpn_bbox_pred_reshape_stride%s" % stride ) rpn_bbox_delta_list.append(rpn_bbox_delta_reshape) rpn_cls_logit_list.append(rpn_cls_logit_reshape) # concat output of each level rpn_bbox_delta_concat = X.concat(rpn_bbox_delta_list, axis=2, name="rpn_bbox_pred_concat") rpn_cls_logit_concat = X.concat(rpn_cls_logit_list, axis=2, name="rpn_cls_score_concat") cls_loss = X.softmax_output( data=rpn_cls_logit_concat, label=cls_label, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, grad_scale=1.0 * scale_loss_shift, name="rpn_cls_loss" ) # regression loss reg_loss = X.smooth_l1( (rpn_bbox_delta_concat - bbox_target), scalar=3.0, name='rpn_reg_l1' ) reg_loss = bbox_weight * reg_loss reg_loss = X.loss( reg_loss, grad_scale=1.0 / (batch_image * image_anchor) * scale_loss_shift, name='rpn_reg_loss' ) return cls_loss, reg_loss
def get_all_proposal_with_filter(self, conv_feat, im_info, valid_ranges): if self._proposal is not None: return self._proposal p = self.p rpn_stride = p.anchor_generate.stride anchor_scale = p.anchor_generate.scale anchor_ratio = p.anchor_generate.ratio pre_nms_top_n = p.proposal.pre_nms_top_n post_nms_top_n = p.proposal.post_nms_top_n nms_thr = p.proposal.nms_thr min_bbox_side = p.proposal.min_bbox_side cls_logit, bbox_delta = self.get_output(conv_feat) # TODO: remove this reshape hell cls_logit_reshape = X.reshape( cls_logit, shape=(0, -4, 2, -1, 0, 0), # (N,C,H,W) -> (N,2,C/2,H,W) name="rpn_cls_logit_reshape_" ) cls_score = X.softmax( cls_logit_reshape, axis=1, name='rpn_cls_score' ) cls_logit_reshape = X.reshape( cls_score, shape=(0, -3, 0, 0), name='rpn_cls_score_reshape' ) proposal = mx.sym.contrib.Proposal_v2( cls_prob=cls_logit_reshape, bbox_pred=bbox_delta, im_info=im_info, valid_ranges=valid_ranges, name='proposal', feature_stride=rpn_stride, scales=tuple(anchor_scale), ratios=tuple(anchor_ratio), rpn_pre_nms_top_n=pre_nms_top_n, rpn_post_nms_top_n=post_nms_top_n, threshold=nms_thr, rpn_min_size=min_bbox_side, iou_loss=False, filter_scales=True, output_score=True ) self._proposal = proposal return proposal
def get_prediction(self, rois, roi_feat, fpn_conv_feats, im_info, play=False): ''' Args: rois: [batch_image, image_roi, 4] roi_feat: [batch_image * image_roi, 256, roi_size, roi_size] fpn_conv_feats: dict of FPN features, each [batch_image, in_channels, fh, fw] im_info: ... Returns: cls_score: [batch_image, image_roi, num_class] bbox_xyxy: [batch_image, image_roi, num_class*4] ''' p = self.p assert not p.regress_target.class_agnostic bbox_mean = p.regress_target.mean bbox_std = p.regress_target.std batch_image = p.batch_image num_class = p.num_class class_agnostic = p.regress_target.class_agnostic num_reg_class = num_class assert batch_image == 1 cls_logit, bbox_delta, tsd_cls_logit, tsd_bbox_delta, delta_c, delta_r = self.get_output( fpn_conv_feats, roi_feat, rois, is_train=False) rois_r = self._get_delta_r_box(delta_r, rois) bbox_xyxy = X.decode_bbox( rois=rois_r, bbox_pred=X.reshape(tsd_bbox_delta, (batch_image, -1, 4 * num_reg_class)), im_info=im_info, name='decode_bbox', bbox_mean=bbox_mean, bbox_std=bbox_std, class_agnostic=False) cls_score = X.reshape(X.softmax(tsd_cls_logit, axis=-1, name='bbox_cls_score'), shape=(batch_image, -1, num_class), name='bbox_cls_score_reshape') if not play: return cls_score, bbox_xyxy else: return cls_score, bbox_xyxy, rois, rois_r
def get_train_symbol(cls, backbone, neck, rpn_head): rpn_cls_label = X.var("rpn_cls_label") rpn_reg_target = X.var("rpn_reg_target") rpn_reg_weight = X.var("rpn_reg_weight") rpn_cls_label = X.reshape(rpn_cls_label, (-3, -2)) rpn_reg_target = X.reshape(rpn_reg_target, (-3, -2)) rpn_reg_weight = X.reshape(rpn_reg_weight, (-3, -2)) rpn_feat = backbone.get_rpn_feature() rpn_feat = neck.get_rpn_feature(rpn_feat) rpn_loss = rpn_head.get_loss(rpn_feat, rpn_cls_label, rpn_reg_target, rpn_reg_weight) return X.group(rpn_loss)
def get_loss(self, conv_feat, cls_label, bbox_target, bbox_weight): p = self.p batch_roi = p.image_roi * p.batch_image batch_image = p.batch_image cls_logit, bbox_delta = self.get_output(conv_feat) scale_loss_shift = 128.0 if p.fp16 else 1.0 # classification loss cls_loss = X.softmax_output(data=cls_logit, label=cls_label, normalization='batch', grad_scale=1.0 * scale_loss_shift, name='bbox_cls_loss') # bounding box regression reg_loss = X.smooth_l1(bbox_delta - bbox_target, scalar=1.0, name='bbox_reg_l1') reg_loss = bbox_weight * reg_loss reg_loss = X.loss( reg_loss, grad_scale=1.0 / batch_roi * scale_loss_shift, name='bbox_reg_loss', ) # append label cls_label = X.reshape(cls_label, shape=(batch_image, -1), name='bbox_label_reshape') cls_label = X.block_grad(cls_label, name='bbox_label_blockgrad') # output return cls_loss, reg_loss, cls_label
def get_all_proposal(self, rois, bbox_pred, im_info): if self._proposal is not None: return self._proposal p = self.p stage = self.stage batch_image = p.batch_image bbox_mean = p.regress_target.mean bbox_std = p.regress_target.std num_class = p.num_class class_agnostic = p.regress_target.class_agnostic num_reg_class = 2 if class_agnostic else num_class bbox_pred = X.reshape(bbox_pred, shape=(batch_image, -1, 4 * num_reg_class), name='bbox_delta_reshape_' + stage) proposal = X.decode_bbox(rois=rois, bbox_pred=bbox_pred, im_info=im_info, name='decode_bbox_' + stage, bbox_mean=bbox_mean, bbox_std=bbox_std, class_agnostic=class_agnostic) # append None for dummy proposal score proposal = (proposal, None) self._proposal = proposal return proposal
def get_loss(self, conv_feat, cls_label, bbox_target, bbox_weight): p = self.p batch_image = p.batch_image image_anchor = p.anchor_generate.image_anchor cls_logit, bbox_delta = self.get_output(conv_feat) scale_loss_shift = 128.0 if p.fp16 else 1.0 # classification loss cls_logit_reshape = X.reshape( cls_logit, shape=(0, -4, 2, -1, 0, 0), # (N,C,H,W) -> (N,2,C/2,H,W) name="rpn_cls_logit_reshape") cls_loss = X.softmax_output(data=cls_logit_reshape, label=cls_label, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, grad_scale=1.0 * scale_loss_shift, name="rpn_cls_loss") # regression loss reg_loss = X.smooth_l1((bbox_delta - bbox_target), scalar=3.0, name='rpn_reg_l1') reg_loss = bbox_weight * reg_loss reg_loss = X.loss(reg_loss, grad_scale=1.0 / (batch_image * image_anchor) * scale_loss_shift, name='rpn_reg_loss') return cls_loss, reg_loss
def get_test_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head, num_branch): rec_id, im_id, im_info, proposal, proposal_score = \ TridentFasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head, num_branch) im_info_branches = TridentResNetV2Builder.stack_branch_symbols([im_info] * num_branch) rcnn_feat = backbone.get_rcnn_feature() rcnn_feat = neck.get_rcnn_feature(rcnn_feat) roi_feat = roi_extractor.get_roi_feature(rcnn_feat, proposal) cls_score, bbox_xyxy = bbox_head.get_prediction(roi_feat, im_info_branches, proposal) cls_score = X.reshape(cls_score, (-3, -2)) bbox_xyxy = X.reshape(bbox_xyxy, (-3, -2)) return X.group([rec_id, im_id, im_info, cls_score, bbox_xyxy])
def _get_bbox_head_logit(self, conv_feat): if self._head_feat is not None: return self._head_feat p = self.p if p.normalizer.__name__ == "fix_bn": conv_feat = X.convrelu(conv_feat, filter=256, kernel=3, name="bbox_conv1") conv_feat = X.convrelu(conv_feat, filter=256, kernel=3, name="bbox_conv2") conv_feat = X.convrelu(conv_feat, filter=256, kernel=3, name="bbox_conv3") conv_feat = X.convrelu(conv_feat, filter=256, kernel=3, name="bbox_conv4") elif p.normalizer.__name__ in ["sync_bn", "gn"]: conv_feat = X.convnormrelu(p.normalizer, conv_feat, filter=256, kernel=3, name="bbox_conv1") conv_feat = X.convnormrelu(p.normalizer, conv_feat, filter=256, kernel=3, name="bbox_conv2") conv_feat = X.convnormrelu(p.normalizer, conv_feat, filter=256, kernel=3, name="bbox_conv3") conv_feat = X.convnormrelu(p.normalizer, conv_feat, filter=256, kernel=3, name="bbox_conv4") else: raise NotImplementedError("Unsupported normalizer: {}".format(p.normalizer.__name__)) flatten = X.flatten(conv_feat, name="bbox_feat_flatten") reshape = X.reshape(flatten, (0, 0, 1, 1), name="bbox_feat_reshape") if p.normalizer.__name__ == "fix_bn": fc1 = X.convrelu(reshape, filter=1024, name="bbox_fc1") elif p.normalizer.__name__ in ["sync_bn", "gn"]: fc1 = X.convnormrelu(p.normalizer, reshape, filter=1024, name="bbox_fc1") else: raise NotImplementedError("Unsupported normalizer: {}".format(p.normalizer.__name__)) self._head_feat = fc1 return self._head_feat
def get_sampled_proposal_with_filter(self, conv_feat, gt_bbox, im_info, valid_ranges): p = self.p batch_image = p.batch_image proposal_wo_gt = p.subsample_proposal.proposal_wo_gt image_roi = p.subsample_proposal.image_roi fg_fraction = p.subsample_proposal.fg_fraction fg_thr = p.subsample_proposal.fg_thr bg_thr_hi = p.subsample_proposal.bg_thr_hi bg_thr_lo = p.subsample_proposal.bg_thr_lo num_reg_class = p.bbox_target.num_reg_class class_agnostic = p.bbox_target.class_agnostic bbox_target_weight = p.bbox_target.weight bbox_target_mean = p.bbox_target.mean bbox_target_std = p.bbox_target.std (proposal, proposal_score) = self.get_all_proposal_with_filter( conv_feat, im_info, valid_ranges) (bbox, label, bbox_target, bbox_weight) = mx.sym.ProposalTarget_v2( rois=proposal, gt_boxes=gt_bbox, valid_ranges=valid_ranges, num_classes=num_reg_class, class_agnostic=class_agnostic, batch_images=batch_image, proposal_without_gt=proposal_wo_gt, image_rois=image_roi, fg_fraction=fg_fraction, fg_thresh=fg_thr, bg_thresh_hi=bg_thr_hi, bg_thresh_lo=bg_thr_lo, bbox_weight=bbox_target_weight, bbox_mean=bbox_target_mean, bbox_std=bbox_target_std, filter_scales=True, name="subsample_proposal") label = X.reshape(label, (-3, -2)) bbox_target = X.reshape(bbox_target, (-3, -2)) bbox_weight = X.reshape(bbox_weight, (-3, -2)) return bbox, label, bbox_target, bbox_weight
def get_sampled_proposal(self, conv_fpn_feat, gt_bbox, im_info): p = self.p batch_image = p.batch_image proposal_wo_gt = p.subsample_proposal.proposal_wo_gt image_roi = p.subsample_proposal.image_roi fg_fraction = p.subsample_proposal.fg_fraction fg_thr = p.subsample_proposal.fg_thr bg_thr_hi = p.subsample_proposal.bg_thr_hi bg_thr_lo = p.subsample_proposal.bg_thr_lo post_nms_top_n = p.proposal.post_nms_top_n num_reg_class = p.bbox_target.num_reg_class class_agnostic = p.bbox_target.class_agnostic bbox_target_weight = p.bbox_target.weight bbox_target_mean = p.bbox_target.mean bbox_target_std = p.bbox_target.std (proposal, proposal_score) = self.get_all_proposal(conv_fpn_feat, im_info) (bbox, label, bbox_target, bbox_weight) = X.proposal_target(rois=proposal, gt_boxes=gt_bbox, num_classes=num_reg_class, class_agnostic=class_agnostic, batch_images=batch_image, proposal_without_gt=proposal_wo_gt, image_rois=image_roi, fg_fraction=fg_fraction, fg_thresh=fg_thr, bg_thresh_hi=bg_thr_hi, bg_thresh_lo=bg_thr_lo, bbox_weight=bbox_target_weight, bbox_mean=bbox_target_mean, bbox_std=bbox_target_std, name="subsample_proposal") label = X.reshape(label, (-3, -2)) bbox_target = X.reshape(bbox_target, (-3, -2)) bbox_weight = X.reshape(bbox_weight, (-3, -2)) return bbox, label, bbox_target, bbox_weight
def get_sampled_proposal(self, rois, bbox_pred, gt_bbox, im_info): p = self.p stage = self.stage batch_image = p.batch_image proposal_wo_gt = p.subsample_proposal.proposal_wo_gt image_roi = -1 # do not subsample rois fg_fraction = p.subsample_proposal.fg_fraction fg_thr = p.subsample_proposal.fg_thr bg_thr_hi = p.subsample_proposal.bg_thr_hi bg_thr_lo = p.subsample_proposal.bg_thr_lo num_reg_class = p.bbox_target.num_reg_class class_agnostic = p.bbox_target.class_agnostic bbox_target_weight = p.bbox_target.weight bbox_target_mean = p.bbox_target.mean bbox_target_std = p.bbox_target.std proposal = self.get_all_proposal(rois, bbox_pred, im_info) (bbox, label, bbox_target, bbox_weight) = X.proposal_target(rois=proposal, gt_boxes=gt_bbox, num_classes=num_reg_class, class_agnostic=class_agnostic, batch_images=batch_image, proposal_without_gt=proposal_wo_gt, image_rois=image_roi, fg_fraction=fg_fraction, fg_thresh=fg_thr, bg_thresh_hi=bg_thr_hi, bg_thresh_lo=bg_thr_lo, bbox_weight=bbox_target_weight, bbox_mean=bbox_target_mean, bbox_std=bbox_target_std, name="subsample_proposal_" + stage) label = X.reshape(label, (-3, -2)) bbox_target = X.reshape(bbox_target, (-3, -2)) bbox_weight = X.reshape(bbox_weight, (-3, -2)) return bbox, label, bbox_target, bbox_weight
def get_all_proposal(self, conv_feat, im_info): if self._proposal is not None: return self._proposal p = self.p rpn_stride = p.anchor_generate.stride anchor_scale = p.anchor_generate.scale anchor_ratio = p.anchor_generate.ratio pre_nms_top_n = p.proposal.pre_nms_top_n post_nms_top_n = p.proposal.post_nms_top_n nms_thr = p.proposal.nms_thr min_bbox_side = p.proposal.min_bbox_side cls_logit, bbox_delta = self.get_output(conv_feat) # TODO: remove this reshape hell cls_logit_reshape = X.reshape( cls_logit, shape=(0, -4, 2, -1, 0, 0), # (N,C,H,W) -> (N,2,C/2,H,W) name="rpn_cls_logit_reshape_") cls_score = X.softmax(cls_logit_reshape, axis=1, name='rpn_cls_score') cls_logit_reshape = X.reshape(cls_score, shape=(0, -3, 0, 0), name='rpn_cls_score_reshape') # TODO: ask all to add is_train filed in RPNParam proposal = X.proposal(cls_prob=cls_logit_reshape, bbox_pred=bbox_delta, im_info=im_info, name='proposal', feature_stride=rpn_stride, scales=tuple(anchor_scale), ratios=tuple(anchor_ratio), rpn_pre_nms_top_n=pre_nms_top_n, rpn_post_nms_top_n=post_nms_top_n, threshold=nms_thr, rpn_min_size=min_bbox_side, iou_loss=False) self._proposal = proposal return proposal
def get_loss(self, conv_feat, mask_target, mask_ind): pBbox = self.pBbox pMask = self.pMask batch_image = pBbox.batch_image mask_fcn_logit = self.get_output(conv_feat) scale_loss_shift = 128.0 if pMask.fp16 else 1.0 mask_fcn_logits = mx.sym.split(mask_fcn_logit, num_outputs=batch_image, axis=0) mask_inds = mx.sym.split(mask_ind, num_outputs=batch_image, axis=0, squeeze_axis=True) mask_fcn_logit_list = [] for mask_fcn_logit, mask_ind in zip(mask_fcn_logits, mask_inds): batch_ind = mx.sym.arange(pMask.num_fg_roi) mask_ind = mx.sym.stack(batch_ind, mask_ind) mask_fcn_logit = mx.sym.gather_nd(mask_fcn_logit, mask_ind, axis=1) mask_fcn_logit_list.append(mask_fcn_logit) mask_fcn_logit = mx.sym.concat(*mask_fcn_logit_list, dim=0) # get mask prediction logits mask_pred_logits = mx.symbol.Activation( data=mask_fcn_logit, act_type='sigmoid', name='mask_pred_prob') mask_fcn_logit = X.reshape( mask_fcn_logit, shape=(1, -1), name="mask_fcn_logit_reshape" ) mask_target = X.reshape( mask_target, shape=(1, -1), name="mask_target_reshape" ) mask_loss = mx.sym.contrib.SigmoidCrossEntropy( mask_fcn_logit, mask_target, grad_scale=1.0 * scale_loss_shift, name="mask_loss" ) return (mask_loss,), mask_pred_logits
def get_loss(self, conv_feat, gt_bboxes, im_infos, rpn_groups): p = self.p num_class = p.num_class batch_image = p.batch_image image_anchor = p.anchor_generate.image_anchor cls_logit, bbox_delta = self.get_output(conv_feat) scale_loss_shift = 128.0 if p.fp16 else 1.0 cls_label = X.var("rpn_cls_label") bbox_target = X.var("rpn_reg_target") bbox_weight = X.var("rpn_reg_weight") # classification loss cls_logit_reshape = X.reshape( cls_logit, shape=(0, -4, num_class, -1, 0, 0), # (N,C,H,W) -> (N,num_class,C/num_class,H,W) name="rpn_cls_logit_reshape") cls_loss = None if p.use_groupsoftmax: cls_loss = mx.sym.contrib.GroupSoftmaxOutput( data=cls_logit_reshape, label=cls_label, group=rpn_groups, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, grad_scale=1.0 * scale_loss_shift, name="rpn_cls_loss") else: cls_loss = X.softmax_output(data=cls_logit_reshape, label=cls_label, multi_output=True, normalization='valid', use_ignore=True, ignore_label=-1, grad_scale=1.0 * scale_loss_shift, name="rpn_cls_loss") # regression loss reg_loss = X.smooth_l1((bbox_delta - bbox_target), scalar=3.0, name='rpn_reg_l1') reg_loss = bbox_weight * reg_loss reg_loss = X.loss(reg_loss, grad_scale=1.0 / (batch_image * image_anchor) * scale_loss_shift, name='rpn_reg_loss') return cls_loss, reg_loss
def _get_bbox_head_logit(self, conv_feat): if self._head_feat is not None: return self._head_feat flatten = X.flatten(conv_feat, name="bbox_feat_flatten") reshape = X.reshape(flatten, (0, 0, 1, 1), name="bbox_feat_reshape") fc1 = X.convrelu(reshape, filter=1024, name="bbox_fc1") fc2 = X.convrelu(fc1, filter=1024, name="bbox_fc2") self._head_feat = fc2 return self._head_feat
def get_sampled_proposal(self, conv_fpn_feat, gt_bbox, im_info): p = self.p batch_image = p.batch_image proposal_wo_gt = p.subsample_proposal.proposal_wo_gt image_roi = p.subsample_proposal.image_roi fg_fraction = p.subsample_proposal.fg_fraction fg_thr = p.subsample_proposal.fg_thr bg_thr_hi = p.subsample_proposal.bg_thr_hi bg_thr_lo = p.subsample_proposal.bg_thr_lo post_nms_top_n = p.proposal.post_nms_top_n num_reg_class = p.bbox_target.num_reg_class class_agnostic = p.bbox_target.class_agnostic bbox_target_weight = p.bbox_target.weight bbox_target_mean = p.bbox_target.mean bbox_target_std = p.bbox_target.std proposal = self.get_all_proposal(conv_fpn_feat, im_info) (bbox, label, bbox_target, bbox_weight) = mx.sym.Custom(proposal=proposal, gt_bbox=gt_bbox, num_class=num_reg_class, add_gt_to_proposal=not proposal_wo_gt, image_rois=image_roi, fg_fraction=fg_fraction, fg_thresh=fg_thr, bg_thresh_hi=bg_thr_hi, bg_thresh_lo=bg_thr_lo, bbox_target_std=bbox_target_std, name="subsample_proposal", op_type="bbox_target") label = X.reshape(label, (-3, -2)) bbox_target = X.reshape(bbox_target, (-3, -2)) bbox_weight = X.reshape(bbox_weight, (-3, -2)) return bbox, label, bbox_target, bbox_weight
def get_roi_feature(self, conv_fpn_feat, proposal): p = self.p rcnn_stride = p.stride roi_canonical_scale = p.roi_canonical_scale roi_canonical_level = p.roi_canonical_level group = mx.symbol.Custom( op_type="assign_layer_fpn", rois=proposal, rcnn_stride=rcnn_stride, roi_canonical_scale=roi_canonical_scale, roi_canonical_level=roi_canonical_level, name="assign_layer_fpn" ) proposal_fpn = dict() for i, stride in enumerate(rcnn_stride): proposal_fpn["stride%s" % stride] = group[i] if p.fp16: for stride in rcnn_stride: conv_fpn_feat["stride%s" % stride] = X.to_fp32( conv_fpn_feat["stride%s" % stride], name="fpn_stride%s_to_fp32" ) fpn_roi_feats = list() for stride in rcnn_stride: feat_lvl = conv_fpn_feat["stride%s" % stride] proposal_lvl = proposal_fpn["stride%s" % stride] roi_feat = X.roi_align( feat_lvl, rois=proposal_lvl, out_size=p.out_size, stride=stride, name="roi_align" ) roi_feat = X.reshape( data=roi_feat, shape=(-3, -2), name='roi_feat_reshape' ) fpn_roi_feats.append(roi_feat) roi_feat = X.add_n(*fpn_roi_feats) if p.fp16: roi_feat = X.to_fp16(roi_feat, name="roi_feat_to_fp16") return roi_feat
def _get_bbox_head_logit(self, conv_feat): # comment this for re-infer in test stage # if self._head_feat is not None: # return self._head_feat p = self.p stage = self.stage flatten = X.flatten(conv_feat, name="bbox_feat_flatten_" + stage) reshape = X.reshape(flatten, (0, 0, 1, 1), name="bbox_feat_reshape_" + stage) if p.normalizer.__name__ == "fix_bn": fc1 = X.convrelu(reshape, filter=1024, weight=self.fc1_weight, bias=self.fc1_bias, no_bias=False, name="bbox_fc1_" + stage) fc2 = X.convrelu(fc1, filter=1024, weight=self.fc2_weight, bias=self.fc2_bias, no_bias=False, name="bbox_fc2_" + stage) elif p.normalizer.__name__ in ["sync_bn", "gn"]: fc1 = X.convnormrelu(p.normalizer, reshape, filter=1024, weight=self.fc1_weight, bias=self.fc1_bias, no_bias=False, name="bbox_fc1_" + stage) fc2 = X.convnormrelu(p.normalizer, fc1, filter=1024, weight=self.fc2_weight, bias=self.fc2_bias, no_bias=False, name="bbox_fc2_" + stage) else: raise NotImplementedError("Unsupported normalizer: {}".format( p.normalizer.__name__)) self._head_feat = fc2 return self._head_feat
def _get_bbox_head_logit(self, conv_feat): if self._head_feat is not None: return self._head_feat xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) flatten = X.reshape(conv_feat, shape=(0, -1, 1, 1), name="bbox_feat_reshape") fc1 = X.conv(flatten, filter=1024, name="bbox_fc1", init=xavier_init) fc1 = self.add_norm(fc1) fc1 = X.relu(fc1) fc2 = X.conv(fc1, filter=1024, name="bbox_fc2", init=xavier_init) fc2 = self.add_norm(fc2) fc2 = X.relu(fc2) self._head_feat = fc2 return self._head_feat
def get_roi_feature(self, rcnn_feat, proposal): p = self.p if p.fp16: rcnn_feat = X.to_fp32(rcnn_feat, "rcnn_feat_to_fp32") roi_feat = X.roi_align(rcnn_feat, rois=proposal, out_size=p.out_size, stride=p.stride, name="roi_align") if p.fp16: roi_feat = X.to_fp16(roi_feat, "roi_feat_to_fp16") roi_feat = X.reshape(roi_feat, (-3, -2)) return roi_feat
def _convs_and_fcs(self, x, num_convs, num_fcs, name, conv_init, fc_init): ''' Args: x: [N, C, H, W] feature maps num_convs: int num_fcs: int conv_init: mx initializer Returns: x: [N, C, H, W] or [N, C, 1, 1] ''' if num_convs == 0 and num_fcs == 0: return x out_channels = self.p.TSD.conv_out_channels out_fc_channels = self.p.TSD.fc_out_channels if num_convs > 0: for i in range(num_convs): x = X.relu( X.conv(x, kernel=3, filter=out_channels, no_bias=False, name=name + '_conv%s' % i, init=conv_init)) if num_fcs > 0: x = X.reshape(x, shape=(0, -1, 1, 1), name=name + '_conv_fc_flatten') for i in range(num_fcs): x = X.relu( X.conv(x, kernel=1, filter=out_fc_channels, no_bias=False, name=name + '_fc%s' % i, init=fc_init)) return x
def _get_bbox_head_logit(self, conv_feat): #if self._head_feat is not None: # return self._head_feat stage = self.stage flatten = X.flatten(conv_feat, name="bbox_feat_flatten_" + stage) reshape = X.reshape(flatten, (0, 0, 1, 1), name="bbox_feat_reshape_" + stage) fc1 = X.conv(reshape, filter=1024, weight=self.fc1_weight, name="bbox_fc1_" + stage) fc1_relu = X.relu(fc1, name="bbox_fc1_relu_" + stage) fc2 = X.conv(fc1_relu, filter=1024, weight=self.fc2_weight, name="bbox_fc2_" + stage) fc2_relu = X.relu(fc2, name="bbox_fc2_" + stage) self._head_feat = fc2_relu return self._head_feat
def get_roi_feature(self, conv_fpn_feat, proposal): p = self.p rcnn_stride = p.stride group = mx.symbol.Custom(rois=proposal, op_type='assign_layer_fpn') proposal_fpn = dict() proposal_fpn["stride4"] = group[1] proposal_fpn["stride8"] = group[2] proposal_fpn["stride16"] = group[3] proposal_fpn["stride32"] = group[4] if p.fp16: for stride in rcnn_stride: conv_fpn_feat["stride%s" % stride] = X.to_fp32( conv_fpn_feat["stride%s" % stride], name="fpn_stride%s_to_fp32") fpn_roi_feats = list() for stride in rcnn_stride: feat_lvl = conv_fpn_feat["stride%s" % stride] proposal_lvl = proposal_fpn["stride%s" % stride] roi_feat = X.roi_align(feat_lvl, rois=proposal_lvl, out_size=p.out_size, stride=stride, name="roi_align") roi_feat = X.reshape(data=roi_feat, shape=(-3, -2), name='roi_feat_reshape') fpn_roi_feats.append(roi_feat) roi_feat = X.add_n(*fpn_roi_feats) if p.fp16: roi_feat = X.to_fp16(roi_feat, name="roi_feat_to_fp16") return roi_feat
def get_all_proposal(self, conv_fpn_feat, im_info): if self._proposal is not None: return self._proposal p = self.p rpn_stride = p.anchor_generate.stride anchor_scale = p.anchor_generate.scale anchor_ratio = p.anchor_generate.ratio pre_nms_top_n = p.proposal.pre_nms_top_n post_nms_top_n = p.proposal.post_nms_top_n nms_thr = p.proposal.nms_thr min_bbox_side = p.proposal.min_bbox_side num_anchors = len(p.anchor_generate.ratio) * len( p.anchor_generate.scale) batch_size = p.batch_image cls_logit_dict, bbox_delta_dict = self.get_output(conv_fpn_feat) # rpn rois for multi level feature proposal_list = [] proposal_scores_list = [] for stride in rpn_stride: rpn_cls_logit = cls_logit_dict[stride] rpn_bbox_delta = bbox_delta_dict[stride] # ROI Proposal rpn_cls_logit_reshape = X.reshape( data=rpn_cls_logit, shape=(0, 2, -1, 0), name="rpn_cls_logit_reshape_stride%s" % stride) rpn_cls_score = mx.symbol.SoftmaxActivation( data=rpn_cls_logit_reshape, mode="channel", name="rpn_cls_score_stride%s" % stride) rpn_cls_score_reshape = X.reshape( data=rpn_cls_score, shape=(0, 2 * num_anchors, -1, 0), name="rpn_cls_score_reshape_stride%s" % stride) rpn_proposal, rpn_proposal_scores = mx.sym.contrib.Proposal_v3( cls_prob=rpn_cls_score_reshape, bbox_pred=rpn_bbox_delta, im_info=im_info, rpn_pre_nms_top_n=pre_nms_top_n, rpn_post_nms_top_n=post_nms_top_n, feature_stride=stride, output_score=True, scales=tuple(anchor_scale), ratios=tuple(anchor_ratio), rpn_min_size=min_bbox_side, threshold=nms_thr, iou_loss=False) if p.nnvm_proposal and stride < rpn_stride[-2]: max_side = p.anchor_generate.max_side assert max_side is not None, "nnvm proposal requires max_side of image" from mxnext.tvm.proposal import proposal as Proposal anchors = self.anchor_dict["stride%s" % stride] rpn_proposal, rpn_proposal_scores = Proposal( cls_prob=rpn_cls_score_reshape, bbox_pred=rpn_bbox_delta, im_info=im_info, anchors=anchors, name='proposal', feature_stride=stride, scales=tuple(anchor_scale), ratios=tuple(anchor_ratio), rpn_pre_nms_top_n=pre_nms_top_n, rpn_post_nms_top_n=post_nms_top_n, threshold=nms_thr, batch_size=batch_size, max_side=max_side, output_score=True, variant="simpledet") proposal_list.append(rpn_proposal) proposal_scores_list.append(rpn_proposal_scores) # concat output rois of each level proposal_concat = X.concat(proposal_list, axis=1, name="proposal_concat") proposal_scores_concat = X.concat(proposal_scores_list, axis=1, name="proposal_scores_concat") from mxnext.tvm.get_top_proposal import get_top_proposal proposal = get_top_proposal(mx.symbol, bbox=proposal_concat, score=proposal_scores_concat, top_n=post_nms_top_n, batch_size=batch_size) self._proposal = proposal return proposal