Exemple #1
0
    def get_prediction(self, conv_feat, im_info, proposal):
        p = self.p
        bbox_mean = p.regress_target.mean
        bbox_std = p.regress_target.std
        batch_image = p.batch_image
        num_class = p.num_class
        class_agnostic = p.regress_target.class_agnostic
        num_reg_class = 2 if class_agnostic else num_class

        cls_logit, bbox_delta = self.get_output(conv_feat)

        bbox_delta = X.reshape(bbox_delta,
                               shape=(batch_image, -1, 4 * num_reg_class),
                               name='bbox_delta_reshape')

        bbox_xyxy = X.decode_bbox(rois=proposal,
                                  bbox_pred=bbox_delta,
                                  im_info=im_info,
                                  name='decode_bbox',
                                  bbox_mean=bbox_mean,
                                  bbox_std=bbox_std,
                                  class_agnostic=class_agnostic)
        cls_score = X.softmax(cls_logit, axis=-1, name='bbox_cls_score')
        cls_score = X.reshape(cls_score,
                              shape=(batch_image, -1, num_class),
                              name='bbox_cls_score_reshape')
        return cls_score, bbox_xyxy
Exemple #2
0
    def get_sampled_proposal_with_filter(self, conv_fpn_feat, gt_bbox, gt_poly,
                                         im_info, valid_ranges):
        p = self.p

        batch_image = p.batch_image

        proposal_wo_gt = p.subsample_proposal.proposal_wo_gt
        image_roi = p.subsample_proposal.image_roi
        fg_fraction = p.subsample_proposal.fg_fraction
        fg_thr = p.subsample_proposal.fg_thr
        bg_thr_hi = p.subsample_proposal.bg_thr_hi
        bg_thr_lo = p.subsample_proposal.bg_thr_lo
        post_nms_top_n = p.proposal.post_nms_top_n

        num_reg_class = p.bbox_target.num_reg_class
        class_agnostic = p.bbox_target.class_agnostic
        bbox_target_weight = p.bbox_target.weight
        bbox_target_mean = p.bbox_target.mean
        bbox_target_std = p.bbox_target.std

        mask_size = self.pMask.resolution

        (proposal, proposal_score) = self.get_all_proposal_with_filter(
            conv_fpn_feat, im_info, valid_ranges)

        (bbox, label, bbox_target, bbox_weight, match_gt_iou,
         mask_target) = mx.sym.ProposalMaskTarget(
             proposal,
             gt_bbox,
             gt_poly,
             valid_ranges,
             mask_size=mask_size,
             num_classes=num_reg_class,
             class_agnostic=class_agnostic,
             batch_images=batch_image,
             proposal_without_gt=proposal_wo_gt,
             image_rois=image_roi,
             fg_fraction=fg_fraction,
             fg_thresh=fg_thr,
             bg_thresh_hi=bg_thr_hi,
             bg_thresh_lo=bg_thr_lo,
             bbox_weight=bbox_target_weight,
             bbox_mean=bbox_target_mean,
             bbox_std=bbox_target_std,
             output_iou=True,
             filter_scales=True,
             name="subsample_proposal")

        label = X.reshape(label, (-3, -2))
        bbox_target = X.reshape(bbox_target, (-3, -2))
        bbox_weight = X.reshape(bbox_weight, (-3, -2))
        mask_target = X.reshape(mask_target, (-3, -2))

        num_fg_rois_per_img = int(image_roi * fg_fraction)
        mask_proposal = mx.sym.slice_axis(bbox,
                                          axis=1,
                                          begin=0,
                                          end=num_fg_rois_per_img)

        return bbox, label, bbox_target, bbox_weight, mask_proposal, mask_target
Exemple #3
0
    def get_train_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head, num_branch, scaleaware):
        gt_bbox = X.var("gt_bbox")
        im_info = X.var("im_info")
        if scaleaware:
            valid_ranges = X.var("valid_ranges")
        rpn_cls_label = X.var("rpn_cls_label")
        rpn_reg_target = X.var("rpn_reg_target")
        rpn_reg_weight = X.var("rpn_reg_weight")

        im_info = TridentResNetV2Builder.stack_branch_symbols([im_info] * num_branch)
        gt_bbox = TridentResNetV2Builder.stack_branch_symbols([gt_bbox] * num_branch)
        if scaleaware:
            valid_ranges = X.reshape(valid_ranges, (-3, -2))
        rpn_cls_label = X.reshape(rpn_cls_label, (-3, -2))
        rpn_reg_target = X.reshape(rpn_reg_target, (-3, -2))
        rpn_reg_weight = X.reshape(rpn_reg_weight, (-3, -2))

        rpn_feat = backbone.get_rpn_feature()
        rcnn_feat = backbone.get_rcnn_feature()
        rpn_feat = neck.get_rpn_feature(rpn_feat)
        rcnn_feat = neck.get_rcnn_feature(rcnn_feat)

        rpn_loss = rpn_head.get_loss(rpn_feat, rpn_cls_label, rpn_reg_target, rpn_reg_weight)
        if scaleaware:
            proposal, bbox_cls, bbox_target, bbox_weight = rpn_head.get_sampled_proposal_with_filter(rpn_feat, gt_bbox, im_info, valid_ranges)
        else:
            proposal, bbox_cls, bbox_target, bbox_weight = rpn_head.get_sampled_proposal(rpn_feat, gt_bbox, im_info)
        roi_feat = roi_extractor.get_roi_feature(rcnn_feat, proposal)
        bbox_loss = bbox_head.get_loss(roi_feat, bbox_cls, bbox_target, bbox_weight)

        return X.group(rpn_loss + bbox_loss)
Exemple #4
0
    def get_loss(self, conv_feat, mask_target):
        pBbox = self.pBbox
        pMask = self.pMask
        batch_image = pBbox.batch_image

        mask_fcn_logit = self.get_output(conv_feat)

        scale_loss_shift = 128.0 if pMask.fp16 else 1.0

        mask_fcn_logit = X.reshape(
            mask_fcn_logit,
            shape=(1, -1),
            name="mask_fcn_logit_reshape"
        )
        mask_target = X.reshape(
            mask_target,
            shape=(1, -1),
            name="mask_target_reshape"
        )
        mask_loss = mx.sym.contrib.SigmoidCrossEntropy(
            mask_fcn_logit,
            mask_target,
            grad_scale=1.0 * scale_loss_shift,
            name="mask_loss"
        )
        return (mask_loss,)
Exemple #5
0
    def get_test_symbol(backbone, neck, rpn_head, roi_extractor,
                        mask_roi_extractor, bbox_head, mask_head,
                        bbox_post_processor, num_branch):
        rec_id, im_id, im_info, proposal, proposal_score = \
            TridentFasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head, num_branch)

        im_info_branches = TridentResNetV2Builder.stack_branch_symbols(
            [im_info] * num_branch)

        rcnn_feat = backbone.get_rcnn_feature()
        rcnn_feat = neck.get_rcnn_feature(rcnn_feat)

        roi_feat = roi_extractor.get_roi_feature(rcnn_feat, proposal)
        cls_score, bbox_xyxy = bbox_head.get_prediction(
            roi_feat, im_info_branches, proposal)
        post_cls_score, post_bbox_xyxy, post_cls = bbox_post_processor.get_post_processing(
            cls_score, bbox_xyxy)

        mask_roi_feat = mask_roi_extractor.get_roi_feature(
            rcnn_feat, post_bbox_xyxy)
        mask = mask_head.get_prediction(mask_roi_feat)

        # fold batch size into roi size for trident only
        post_cls_score = X.reshape(post_cls_score, (-3, -2),
                                   name="post_cls_score_fold")
        post_bbox_xyxy = X.reshape(post_bbox_xyxy, (-3, -2),
                                   name="post_bbox_xyxy_fold")
        post_cls = X.reshape(post_cls, (-3, -2), name="post_cls_fold")

        return X.group([
            rec_id, im_id, im_info, post_cls_score, post_bbox_xyxy, post_cls,
            mask
        ])
Exemple #6
0
    def get_loss(self, conv_fpn_feat, cls_label, bbox_target, bbox_weight):
        p = self.p
        batch_image = p.batch_image
        image_anchor = p.anchor_generate.image_anchor
        rpn_stride = p.anchor_generate.stride

        cls_logit_dict, bbox_delta_dict = self.get_output(conv_fpn_feat)

        scale_loss_shift = 128.0 if p.fp16 else 1.0

        rpn_cls_logit_list = []
        rpn_bbox_delta_list = []

        for stride in rpn_stride:
            rpn_cls_logit = cls_logit_dict[stride]
            rpn_bbox_delta = bbox_delta_dict[stride]
            rpn_cls_logit_reshape = X.reshape(
                data=rpn_cls_logit,
                shape=(0, 2, -1),
                name="rpn_cls_score_reshape_stride%s" % stride
            )
            rpn_bbox_delta_reshape = X.reshape(
                data=rpn_bbox_delta,
                shape=(0, 0, -1),
                name="rpn_bbox_pred_reshape_stride%s" % stride
            )
            rpn_bbox_delta_list.append(rpn_bbox_delta_reshape)
            rpn_cls_logit_list.append(rpn_cls_logit_reshape)

        # concat output of each level
        rpn_bbox_delta_concat = X.concat(rpn_bbox_delta_list, axis=2, name="rpn_bbox_pred_concat")
        rpn_cls_logit_concat = X.concat(rpn_cls_logit_list, axis=2, name="rpn_cls_score_concat")

        cls_loss = X.softmax_output(
            data=rpn_cls_logit_concat,
            label=cls_label,
            multi_output=True,
            normalization='valid',
            use_ignore=True,
            ignore_label=-1,
            grad_scale=1.0 * scale_loss_shift,
            name="rpn_cls_loss"
        )

        # regression loss
        reg_loss = X.smooth_l1(
            (rpn_bbox_delta_concat - bbox_target),
            scalar=3.0,
            name='rpn_reg_l1'
        )
        reg_loss = bbox_weight * reg_loss
        reg_loss = X.loss(
            reg_loss,
            grad_scale=1.0 / (batch_image * image_anchor) * scale_loss_shift,
            name='rpn_reg_loss'
        )
        return cls_loss, reg_loss
Exemple #7
0
    def get_all_proposal_with_filter(self, conv_feat, im_info, valid_ranges):
        if self._proposal is not None:
            return self._proposal

        p = self.p
        rpn_stride = p.anchor_generate.stride
        anchor_scale = p.anchor_generate.scale
        anchor_ratio = p.anchor_generate.ratio
        pre_nms_top_n = p.proposal.pre_nms_top_n
        post_nms_top_n = p.proposal.post_nms_top_n
        nms_thr = p.proposal.nms_thr
        min_bbox_side = p.proposal.min_bbox_side

        cls_logit, bbox_delta = self.get_output(conv_feat)

        # TODO: remove this reshape hell
        cls_logit_reshape = X.reshape(
            cls_logit,
            shape=(0, -4, 2, -1, 0, 0),  # (N,C,H,W) -> (N,2,C/2,H,W)
            name="rpn_cls_logit_reshape_"
        )
        cls_score = X.softmax(
            cls_logit_reshape,
            axis=1,
            name='rpn_cls_score'
        )
        cls_logit_reshape = X.reshape(
            cls_score,
            shape=(0, -3, 0, 0),
            name='rpn_cls_score_reshape'
        )
        proposal = mx.sym.contrib.Proposal_v2(
            cls_prob=cls_logit_reshape,
            bbox_pred=bbox_delta,
            im_info=im_info,
            valid_ranges=valid_ranges,
            name='proposal',
            feature_stride=rpn_stride,
            scales=tuple(anchor_scale),
            ratios=tuple(anchor_ratio),
            rpn_pre_nms_top_n=pre_nms_top_n,
            rpn_post_nms_top_n=post_nms_top_n,
            threshold=nms_thr,
            rpn_min_size=min_bbox_side,
            iou_loss=False,
            filter_scales=True,
            output_score=True
        )

        self._proposal = proposal

        return proposal
Exemple #8
0
    def get_prediction(self,
                       rois,
                       roi_feat,
                       fpn_conv_feats,
                       im_info,
                       play=False):
        '''
        Args:
            rois: [batch_image, image_roi, 4]
            roi_feat: [batch_image * image_roi, 256, roi_size, roi_size]
            fpn_conv_feats: dict of FPN features, each [batch_image, in_channels, fh, fw]
            im_info: ...
        Returns:
            cls_score: [batch_image, image_roi, num_class]
            bbox_xyxy: [batch_image, image_roi, num_class*4]
        '''
        p = self.p
        assert not p.regress_target.class_agnostic

        bbox_mean = p.regress_target.mean
        bbox_std = p.regress_target.std
        batch_image = p.batch_image
        num_class = p.num_class
        class_agnostic = p.regress_target.class_agnostic
        num_reg_class = num_class

        assert batch_image == 1

        cls_logit, bbox_delta, tsd_cls_logit, tsd_bbox_delta, delta_c, delta_r = self.get_output(
            fpn_conv_feats, roi_feat, rois, is_train=False)
        rois_r = self._get_delta_r_box(delta_r, rois)

        bbox_xyxy = X.decode_bbox(
            rois=rois_r,
            bbox_pred=X.reshape(tsd_bbox_delta,
                                (batch_image, -1, 4 * num_reg_class)),
            im_info=im_info,
            name='decode_bbox',
            bbox_mean=bbox_mean,
            bbox_std=bbox_std,
            class_agnostic=False)
        cls_score = X.reshape(X.softmax(tsd_cls_logit,
                                        axis=-1,
                                        name='bbox_cls_score'),
                              shape=(batch_image, -1, num_class),
                              name='bbox_cls_score_reshape')

        if not play:
            return cls_score, bbox_xyxy
        else:
            return cls_score, bbox_xyxy, rois, rois_r
Exemple #9
0
    def get_train_symbol(cls, backbone, neck, rpn_head):
        rpn_cls_label = X.var("rpn_cls_label")
        rpn_reg_target = X.var("rpn_reg_target")
        rpn_reg_weight = X.var("rpn_reg_weight")

        rpn_cls_label = X.reshape(rpn_cls_label, (-3, -2))
        rpn_reg_target = X.reshape(rpn_reg_target, (-3, -2))
        rpn_reg_weight = X.reshape(rpn_reg_weight, (-3, -2))

        rpn_feat = backbone.get_rpn_feature()
        rpn_feat = neck.get_rpn_feature(rpn_feat)
        rpn_loss = rpn_head.get_loss(rpn_feat, rpn_cls_label, rpn_reg_target, rpn_reg_weight)

        return X.group(rpn_loss)
Exemple #10
0
    def get_loss(self, conv_feat, cls_label, bbox_target, bbox_weight):
        p = self.p
        batch_roi = p.image_roi * p.batch_image
        batch_image = p.batch_image

        cls_logit, bbox_delta = self.get_output(conv_feat)

        scale_loss_shift = 128.0 if p.fp16 else 1.0

        # classification loss
        cls_loss = X.softmax_output(data=cls_logit,
                                    label=cls_label,
                                    normalization='batch',
                                    grad_scale=1.0 * scale_loss_shift,
                                    name='bbox_cls_loss')

        # bounding box regression
        reg_loss = X.smooth_l1(bbox_delta - bbox_target,
                               scalar=1.0,
                               name='bbox_reg_l1')
        reg_loss = bbox_weight * reg_loss
        reg_loss = X.loss(
            reg_loss,
            grad_scale=1.0 / batch_roi * scale_loss_shift,
            name='bbox_reg_loss',
        )

        # append label
        cls_label = X.reshape(cls_label,
                              shape=(batch_image, -1),
                              name='bbox_label_reshape')
        cls_label = X.block_grad(cls_label, name='bbox_label_blockgrad')

        # output
        return cls_loss, reg_loss, cls_label
Exemple #11
0
    def get_all_proposal(self, rois, bbox_pred, im_info):
        if self._proposal is not None:
            return self._proposal

        p = self.p
        stage = self.stage
        batch_image = p.batch_image
        bbox_mean = p.regress_target.mean
        bbox_std = p.regress_target.std
        num_class = p.num_class
        class_agnostic = p.regress_target.class_agnostic
        num_reg_class = 2 if class_agnostic else num_class

        bbox_pred = X.reshape(bbox_pred,
                              shape=(batch_image, -1, 4 * num_reg_class),
                              name='bbox_delta_reshape_' + stage)

        proposal = X.decode_bbox(rois=rois,
                                 bbox_pred=bbox_pred,
                                 im_info=im_info,
                                 name='decode_bbox_' + stage,
                                 bbox_mean=bbox_mean,
                                 bbox_std=bbox_std,
                                 class_agnostic=class_agnostic)

        # append None for dummy proposal score
        proposal = (proposal, None)

        self._proposal = proposal

        return proposal
Exemple #12
0
    def get_loss(self, conv_feat, cls_label, bbox_target, bbox_weight):
        p = self.p
        batch_image = p.batch_image
        image_anchor = p.anchor_generate.image_anchor

        cls_logit, bbox_delta = self.get_output(conv_feat)

        scale_loss_shift = 128.0 if p.fp16 else 1.0

        # classification loss
        cls_logit_reshape = X.reshape(
            cls_logit,
            shape=(0, -4, 2, -1, 0, 0),  # (N,C,H,W) -> (N,2,C/2,H,W)
            name="rpn_cls_logit_reshape")
        cls_loss = X.softmax_output(data=cls_logit_reshape,
                                    label=cls_label,
                                    multi_output=True,
                                    normalization='valid',
                                    use_ignore=True,
                                    ignore_label=-1,
                                    grad_scale=1.0 * scale_loss_shift,
                                    name="rpn_cls_loss")

        # regression loss
        reg_loss = X.smooth_l1((bbox_delta - bbox_target),
                               scalar=3.0,
                               name='rpn_reg_l1')
        reg_loss = bbox_weight * reg_loss
        reg_loss = X.loss(reg_loss,
                          grad_scale=1.0 / (batch_image * image_anchor) *
                          scale_loss_shift,
                          name='rpn_reg_loss')

        return cls_loss, reg_loss
Exemple #13
0
    def get_test_symbol(cls, backbone, neck, rpn_head, roi_extractor, bbox_head, num_branch):
        rec_id, im_id, im_info, proposal, proposal_score = \
            TridentFasterRcnn.get_rpn_test_symbol(backbone, neck, rpn_head, num_branch)

        im_info_branches = TridentResNetV2Builder.stack_branch_symbols([im_info] * num_branch)

        rcnn_feat = backbone.get_rcnn_feature()
        rcnn_feat = neck.get_rcnn_feature(rcnn_feat)

        roi_feat = roi_extractor.get_roi_feature(rcnn_feat, proposal)
        cls_score, bbox_xyxy = bbox_head.get_prediction(roi_feat, im_info_branches, proposal)

        cls_score = X.reshape(cls_score, (-3, -2))
        bbox_xyxy = X.reshape(bbox_xyxy, (-3, -2))

        return X.group([rec_id, im_id, im_info, cls_score, bbox_xyxy])
Exemple #14
0
    def _get_bbox_head_logit(self, conv_feat):
        if self._head_feat is not None:
            return self._head_feat

        p = self.p

        if p.normalizer.__name__ == "fix_bn":
            conv_feat = X.convrelu(conv_feat, filter=256, kernel=3, name="bbox_conv1")
            conv_feat = X.convrelu(conv_feat, filter=256, kernel=3, name="bbox_conv2")
            conv_feat = X.convrelu(conv_feat, filter=256, kernel=3, name="bbox_conv3")
            conv_feat = X.convrelu(conv_feat, filter=256, kernel=3, name="bbox_conv4")
        elif p.normalizer.__name__ in ["sync_bn", "gn"]:
            conv_feat = X.convnormrelu(p.normalizer, conv_feat, filter=256, kernel=3, name="bbox_conv1")
            conv_feat = X.convnormrelu(p.normalizer, conv_feat, filter=256, kernel=3, name="bbox_conv2")
            conv_feat = X.convnormrelu(p.normalizer, conv_feat, filter=256, kernel=3, name="bbox_conv3")
            conv_feat = X.convnormrelu(p.normalizer, conv_feat, filter=256, kernel=3, name="bbox_conv4")
        else:
            raise NotImplementedError("Unsupported normalizer: {}".format(p.normalizer.__name__))

        flatten = X.flatten(conv_feat, name="bbox_feat_flatten")
        reshape = X.reshape(flatten, (0, 0, 1, 1), name="bbox_feat_reshape")

        if p.normalizer.__name__ == "fix_bn":
            fc1 = X.convrelu(reshape, filter=1024, name="bbox_fc1")
        elif p.normalizer.__name__ in ["sync_bn", "gn"]:
            fc1 = X.convnormrelu(p.normalizer, reshape, filter=1024, name="bbox_fc1")
        else:
            raise NotImplementedError("Unsupported normalizer: {}".format(p.normalizer.__name__))

        self._head_feat = fc1

        return self._head_feat
Exemple #15
0
    def get_sampled_proposal_with_filter(self, conv_feat, gt_bbox, im_info,
                                         valid_ranges):
        p = self.p

        batch_image = p.batch_image

        proposal_wo_gt = p.subsample_proposal.proposal_wo_gt
        image_roi = p.subsample_proposal.image_roi
        fg_fraction = p.subsample_proposal.fg_fraction
        fg_thr = p.subsample_proposal.fg_thr
        bg_thr_hi = p.subsample_proposal.bg_thr_hi
        bg_thr_lo = p.subsample_proposal.bg_thr_lo

        num_reg_class = p.bbox_target.num_reg_class
        class_agnostic = p.bbox_target.class_agnostic
        bbox_target_weight = p.bbox_target.weight
        bbox_target_mean = p.bbox_target.mean
        bbox_target_std = p.bbox_target.std

        (proposal, proposal_score) = self.get_all_proposal_with_filter(
            conv_feat, im_info, valid_ranges)

        (bbox, label, bbox_target, bbox_weight) = mx.sym.ProposalTarget_v2(
            rois=proposal,
            gt_boxes=gt_bbox,
            valid_ranges=valid_ranges,
            num_classes=num_reg_class,
            class_agnostic=class_agnostic,
            batch_images=batch_image,
            proposal_without_gt=proposal_wo_gt,
            image_rois=image_roi,
            fg_fraction=fg_fraction,
            fg_thresh=fg_thr,
            bg_thresh_hi=bg_thr_hi,
            bg_thresh_lo=bg_thr_lo,
            bbox_weight=bbox_target_weight,
            bbox_mean=bbox_target_mean,
            bbox_std=bbox_target_std,
            filter_scales=True,
            name="subsample_proposal")

        label = X.reshape(label, (-3, -2))
        bbox_target = X.reshape(bbox_target, (-3, -2))
        bbox_weight = X.reshape(bbox_weight, (-3, -2))

        return bbox, label, bbox_target, bbox_weight
Exemple #16
0
    def get_sampled_proposal(self, conv_fpn_feat, gt_bbox, im_info):
        p = self.p

        batch_image = p.batch_image

        proposal_wo_gt = p.subsample_proposal.proposal_wo_gt
        image_roi = p.subsample_proposal.image_roi
        fg_fraction = p.subsample_proposal.fg_fraction
        fg_thr = p.subsample_proposal.fg_thr
        bg_thr_hi = p.subsample_proposal.bg_thr_hi
        bg_thr_lo = p.subsample_proposal.bg_thr_lo
        post_nms_top_n = p.proposal.post_nms_top_n

        num_reg_class = p.bbox_target.num_reg_class
        class_agnostic = p.bbox_target.class_agnostic
        bbox_target_weight = p.bbox_target.weight
        bbox_target_mean = p.bbox_target.mean
        bbox_target_std = p.bbox_target.std

        (proposal,
         proposal_score) = self.get_all_proposal(conv_fpn_feat, im_info)

        (bbox, label, bbox_target,
         bbox_weight) = X.proposal_target(rois=proposal,
                                          gt_boxes=gt_bbox,
                                          num_classes=num_reg_class,
                                          class_agnostic=class_agnostic,
                                          batch_images=batch_image,
                                          proposal_without_gt=proposal_wo_gt,
                                          image_rois=image_roi,
                                          fg_fraction=fg_fraction,
                                          fg_thresh=fg_thr,
                                          bg_thresh_hi=bg_thr_hi,
                                          bg_thresh_lo=bg_thr_lo,
                                          bbox_weight=bbox_target_weight,
                                          bbox_mean=bbox_target_mean,
                                          bbox_std=bbox_target_std,
                                          name="subsample_proposal")

        label = X.reshape(label, (-3, -2))
        bbox_target = X.reshape(bbox_target, (-3, -2))
        bbox_weight = X.reshape(bbox_weight, (-3, -2))

        return bbox, label, bbox_target, bbox_weight
Exemple #17
0
    def get_sampled_proposal(self, rois, bbox_pred, gt_bbox, im_info):
        p = self.p
        stage = self.stage

        batch_image = p.batch_image

        proposal_wo_gt = p.subsample_proposal.proposal_wo_gt
        image_roi = -1  # do not subsample rois
        fg_fraction = p.subsample_proposal.fg_fraction
        fg_thr = p.subsample_proposal.fg_thr
        bg_thr_hi = p.subsample_proposal.bg_thr_hi
        bg_thr_lo = p.subsample_proposal.bg_thr_lo

        num_reg_class = p.bbox_target.num_reg_class
        class_agnostic = p.bbox_target.class_agnostic
        bbox_target_weight = p.bbox_target.weight
        bbox_target_mean = p.bbox_target.mean
        bbox_target_std = p.bbox_target.std

        proposal = self.get_all_proposal(rois, bbox_pred, im_info)

        (bbox, label, bbox_target,
         bbox_weight) = X.proposal_target(rois=proposal,
                                          gt_boxes=gt_bbox,
                                          num_classes=num_reg_class,
                                          class_agnostic=class_agnostic,
                                          batch_images=batch_image,
                                          proposal_without_gt=proposal_wo_gt,
                                          image_rois=image_roi,
                                          fg_fraction=fg_fraction,
                                          fg_thresh=fg_thr,
                                          bg_thresh_hi=bg_thr_hi,
                                          bg_thresh_lo=bg_thr_lo,
                                          bbox_weight=bbox_target_weight,
                                          bbox_mean=bbox_target_mean,
                                          bbox_std=bbox_target_std,
                                          name="subsample_proposal_" + stage)

        label = X.reshape(label, (-3, -2))
        bbox_target = X.reshape(bbox_target, (-3, -2))
        bbox_weight = X.reshape(bbox_weight, (-3, -2))

        return bbox, label, bbox_target, bbox_weight
Exemple #18
0
    def get_all_proposal(self, conv_feat, im_info):
        if self._proposal is not None:
            return self._proposal

        p = self.p
        rpn_stride = p.anchor_generate.stride
        anchor_scale = p.anchor_generate.scale
        anchor_ratio = p.anchor_generate.ratio
        pre_nms_top_n = p.proposal.pre_nms_top_n
        post_nms_top_n = p.proposal.post_nms_top_n
        nms_thr = p.proposal.nms_thr
        min_bbox_side = p.proposal.min_bbox_side

        cls_logit, bbox_delta = self.get_output(conv_feat)

        # TODO: remove this reshape hell
        cls_logit_reshape = X.reshape(
            cls_logit,
            shape=(0, -4, 2, -1, 0, 0),  # (N,C,H,W) -> (N,2,C/2,H,W)
            name="rpn_cls_logit_reshape_")
        cls_score = X.softmax(cls_logit_reshape, axis=1, name='rpn_cls_score')
        cls_logit_reshape = X.reshape(cls_score,
                                      shape=(0, -3, 0, 0),
                                      name='rpn_cls_score_reshape')

        # TODO: ask all to add is_train filed in RPNParam
        proposal = X.proposal(cls_prob=cls_logit_reshape,
                              bbox_pred=bbox_delta,
                              im_info=im_info,
                              name='proposal',
                              feature_stride=rpn_stride,
                              scales=tuple(anchor_scale),
                              ratios=tuple(anchor_ratio),
                              rpn_pre_nms_top_n=pre_nms_top_n,
                              rpn_post_nms_top_n=post_nms_top_n,
                              threshold=nms_thr,
                              rpn_min_size=min_bbox_side,
                              iou_loss=False)

        self._proposal = proposal

        return proposal
Exemple #19
0
    def get_loss(self, conv_feat, mask_target, mask_ind):
        pBbox = self.pBbox
        pMask = self.pMask
        batch_image = pBbox.batch_image

        mask_fcn_logit = self.get_output(conv_feat)

        scale_loss_shift = 128.0 if pMask.fp16 else 1.0

        mask_fcn_logits = mx.sym.split(mask_fcn_logit, num_outputs=batch_image, axis=0)
        mask_inds = mx.sym.split(mask_ind, num_outputs=batch_image, axis=0, squeeze_axis=True)
        mask_fcn_logit_list = []
        for mask_fcn_logit, mask_ind in zip(mask_fcn_logits, mask_inds):
            batch_ind = mx.sym.arange(pMask.num_fg_roi)
            mask_ind = mx.sym.stack(batch_ind, mask_ind)
            mask_fcn_logit = mx.sym.gather_nd(mask_fcn_logit, mask_ind, axis=1)
            mask_fcn_logit_list.append(mask_fcn_logit)
        mask_fcn_logit = mx.sym.concat(*mask_fcn_logit_list, dim=0)

        # get mask prediction logits
        mask_pred_logits = mx.symbol.Activation(
            data=mask_fcn_logit,
            act_type='sigmoid',
            name='mask_pred_prob')

        mask_fcn_logit = X.reshape(
            mask_fcn_logit,
            shape=(1, -1),
            name="mask_fcn_logit_reshape"
        )
        mask_target = X.reshape(
            mask_target,
            shape=(1, -1),
            name="mask_target_reshape"
        )
        mask_loss = mx.sym.contrib.SigmoidCrossEntropy(
            mask_fcn_logit,
            mask_target,
            grad_scale=1.0 * scale_loss_shift,
            name="mask_loss"
        )
        return (mask_loss,), mask_pred_logits
Exemple #20
0
    def get_loss(self, conv_feat, gt_bboxes, im_infos, rpn_groups):
        p = self.p
        num_class = p.num_class
        batch_image = p.batch_image
        image_anchor = p.anchor_generate.image_anchor

        cls_logit, bbox_delta = self.get_output(conv_feat)

        scale_loss_shift = 128.0 if p.fp16 else 1.0

        cls_label = X.var("rpn_cls_label")
        bbox_target = X.var("rpn_reg_target")
        bbox_weight = X.var("rpn_reg_weight")

        # classification loss
        cls_logit_reshape = X.reshape(
            cls_logit,
            shape=(0, -4, num_class, -1, 0,
                   0),  # (N,C,H,W) -> (N,num_class,C/num_class,H,W)
            name="rpn_cls_logit_reshape")

        cls_loss = None
        if p.use_groupsoftmax:
            cls_loss = mx.sym.contrib.GroupSoftmaxOutput(
                data=cls_logit_reshape,
                label=cls_label,
                group=rpn_groups,
                multi_output=True,
                normalization='valid',
                use_ignore=True,
                ignore_label=-1,
                grad_scale=1.0 * scale_loss_shift,
                name="rpn_cls_loss")
        else:
            cls_loss = X.softmax_output(data=cls_logit_reshape,
                                        label=cls_label,
                                        multi_output=True,
                                        normalization='valid',
                                        use_ignore=True,
                                        ignore_label=-1,
                                        grad_scale=1.0 * scale_loss_shift,
                                        name="rpn_cls_loss")

        # regression loss
        reg_loss = X.smooth_l1((bbox_delta - bbox_target),
                               scalar=3.0,
                               name='rpn_reg_l1')
        reg_loss = bbox_weight * reg_loss
        reg_loss = X.loss(reg_loss,
                          grad_scale=1.0 / (batch_image * image_anchor) *
                          scale_loss_shift,
                          name='rpn_reg_loss')

        return cls_loss, reg_loss
Exemple #21
0
    def _get_bbox_head_logit(self, conv_feat):
        if self._head_feat is not None:
            return self._head_feat

        flatten = X.flatten(conv_feat, name="bbox_feat_flatten")
        reshape = X.reshape(flatten, (0, 0, 1, 1), name="bbox_feat_reshape")
        fc1 = X.convrelu(reshape, filter=1024, name="bbox_fc1")
        fc2 = X.convrelu(fc1, filter=1024, name="bbox_fc2")

        self._head_feat = fc2

        return self._head_feat
Exemple #22
0
    def get_sampled_proposal(self, conv_fpn_feat, gt_bbox, im_info):
        p = self.p

        batch_image = p.batch_image

        proposal_wo_gt = p.subsample_proposal.proposal_wo_gt
        image_roi = p.subsample_proposal.image_roi
        fg_fraction = p.subsample_proposal.fg_fraction
        fg_thr = p.subsample_proposal.fg_thr
        bg_thr_hi = p.subsample_proposal.bg_thr_hi
        bg_thr_lo = p.subsample_proposal.bg_thr_lo
        post_nms_top_n = p.proposal.post_nms_top_n

        num_reg_class = p.bbox_target.num_reg_class
        class_agnostic = p.bbox_target.class_agnostic
        bbox_target_weight = p.bbox_target.weight
        bbox_target_mean = p.bbox_target.mean
        bbox_target_std = p.bbox_target.std

        proposal = self.get_all_proposal(conv_fpn_feat, im_info)

        (bbox, label, bbox_target,
         bbox_weight) = mx.sym.Custom(proposal=proposal,
                                      gt_bbox=gt_bbox,
                                      num_class=num_reg_class,
                                      add_gt_to_proposal=not proposal_wo_gt,
                                      image_rois=image_roi,
                                      fg_fraction=fg_fraction,
                                      fg_thresh=fg_thr,
                                      bg_thresh_hi=bg_thr_hi,
                                      bg_thresh_lo=bg_thr_lo,
                                      bbox_target_std=bbox_target_std,
                                      name="subsample_proposal",
                                      op_type="bbox_target")

        label = X.reshape(label, (-3, -2))
        bbox_target = X.reshape(bbox_target, (-3, -2))
        bbox_weight = X.reshape(bbox_weight, (-3, -2))

        return bbox, label, bbox_target, bbox_weight
Exemple #23
0
    def get_roi_feature(self, conv_fpn_feat, proposal):
        p = self.p
        rcnn_stride = p.stride
        roi_canonical_scale = p.roi_canonical_scale
        roi_canonical_level = p.roi_canonical_level

        group = mx.symbol.Custom(
            op_type="assign_layer_fpn",
            rois=proposal,
            rcnn_stride=rcnn_stride,
            roi_canonical_scale=roi_canonical_scale,
            roi_canonical_level=roi_canonical_level,
            name="assign_layer_fpn"
        )
        proposal_fpn = dict()
        for i, stride in enumerate(rcnn_stride):
            proposal_fpn["stride%s" % stride] = group[i]

        if p.fp16:
            for stride in rcnn_stride:
                conv_fpn_feat["stride%s" % stride] = X.to_fp32(
                    conv_fpn_feat["stride%s" % stride],
                    name="fpn_stride%s_to_fp32"
                )

        fpn_roi_feats = list()
        for stride in rcnn_stride:
            feat_lvl = conv_fpn_feat["stride%s" % stride]
            proposal_lvl = proposal_fpn["stride%s" % stride]
            roi_feat = X.roi_align(
                feat_lvl,
                rois=proposal_lvl,
                out_size=p.out_size,
                stride=stride,
                name="roi_align"
            )
            roi_feat = X.reshape(
                data=roi_feat,
                shape=(-3, -2),
                name='roi_feat_reshape'
            )
            fpn_roi_feats.append(roi_feat)
        roi_feat = X.add_n(*fpn_roi_feats)

        if p.fp16:
            roi_feat = X.to_fp16(roi_feat, name="roi_feat_to_fp16")

        return roi_feat
Exemple #24
0
    def _get_bbox_head_logit(self, conv_feat):
        # comment this for re-infer in test stage
        # if self._head_feat is not None:
        #     return self._head_feat

        p = self.p
        stage = self.stage

        flatten = X.flatten(conv_feat, name="bbox_feat_flatten_" + stage)
        reshape = X.reshape(flatten, (0, 0, 1, 1),
                            name="bbox_feat_reshape_" + stage)

        if p.normalizer.__name__ == "fix_bn":
            fc1 = X.convrelu(reshape,
                             filter=1024,
                             weight=self.fc1_weight,
                             bias=self.fc1_bias,
                             no_bias=False,
                             name="bbox_fc1_" + stage)
            fc2 = X.convrelu(fc1,
                             filter=1024,
                             weight=self.fc2_weight,
                             bias=self.fc2_bias,
                             no_bias=False,
                             name="bbox_fc2_" + stage)
        elif p.normalizer.__name__ in ["sync_bn", "gn"]:
            fc1 = X.convnormrelu(p.normalizer,
                                 reshape,
                                 filter=1024,
                                 weight=self.fc1_weight,
                                 bias=self.fc1_bias,
                                 no_bias=False,
                                 name="bbox_fc1_" + stage)
            fc2 = X.convnormrelu(p.normalizer,
                                 fc1,
                                 filter=1024,
                                 weight=self.fc2_weight,
                                 bias=self.fc2_bias,
                                 no_bias=False,
                                 name="bbox_fc2_" + stage)
        else:
            raise NotImplementedError("Unsupported normalizer: {}".format(
                p.normalizer.__name__))

        self._head_feat = fc2

        return self._head_feat
Exemple #25
0
    def _get_bbox_head_logit(self, conv_feat):
        if self._head_feat is not None:
            return self._head_feat

        xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3)

        flatten = X.reshape(conv_feat, shape=(0, -1, 1, 1), name="bbox_feat_reshape")
        fc1 = X.conv(flatten, filter=1024, name="bbox_fc1", init=xavier_init)
        fc1 = self.add_norm(fc1)
        fc1 = X.relu(fc1)
        fc2 = X.conv(fc1, filter=1024, name="bbox_fc2", init=xavier_init)
        fc2 = self.add_norm(fc2)
        fc2 = X.relu(fc2)

        self._head_feat = fc2

        return self._head_feat
Exemple #26
0
    def get_roi_feature(self, rcnn_feat, proposal):
        p = self.p

        if p.fp16:
            rcnn_feat = X.to_fp32(rcnn_feat, "rcnn_feat_to_fp32")

        roi_feat = X.roi_align(rcnn_feat,
                               rois=proposal,
                               out_size=p.out_size,
                               stride=p.stride,
                               name="roi_align")

        if p.fp16:
            roi_feat = X.to_fp16(roi_feat, "roi_feat_to_fp16")

        roi_feat = X.reshape(roi_feat, (-3, -2))

        return roi_feat
Exemple #27
0
    def _convs_and_fcs(self, x, num_convs, num_fcs, name, conv_init, fc_init):
        '''
        Args:
            x: [N, C, H, W] feature maps
            num_convs: int
            num_fcs: int
            conv_init: mx initializer
        Returns:
            x: [N, C, H, W] or [N, C, 1, 1]
        '''
        if num_convs == 0 and num_fcs == 0:
            return x

        out_channels = self.p.TSD.conv_out_channels
        out_fc_channels = self.p.TSD.fc_out_channels

        if num_convs > 0:
            for i in range(num_convs):
                x = X.relu(
                    X.conv(x,
                           kernel=3,
                           filter=out_channels,
                           no_bias=False,
                           name=name + '_conv%s' % i,
                           init=conv_init))

        if num_fcs > 0:
            x = X.reshape(x,
                          shape=(0, -1, 1, 1),
                          name=name + '_conv_fc_flatten')
            for i in range(num_fcs):
                x = X.relu(
                    X.conv(x,
                           kernel=1,
                           filter=out_fc_channels,
                           no_bias=False,
                           name=name + '_fc%s' % i,
                           init=fc_init))
        return x
Exemple #28
0
    def _get_bbox_head_logit(self, conv_feat):
        #if self._head_feat is not None:
        #    return self._head_feat

        stage = self.stage

        flatten = X.flatten(conv_feat, name="bbox_feat_flatten_" + stage)
        reshape = X.reshape(flatten, (0, 0, 1, 1),
                            name="bbox_feat_reshape_" + stage)
        fc1 = X.conv(reshape,
                     filter=1024,
                     weight=self.fc1_weight,
                     name="bbox_fc1_" + stage)
        fc1_relu = X.relu(fc1, name="bbox_fc1_relu_" + stage)
        fc2 = X.conv(fc1_relu,
                     filter=1024,
                     weight=self.fc2_weight,
                     name="bbox_fc2_" + stage)
        fc2_relu = X.relu(fc2, name="bbox_fc2_" + stage)

        self._head_feat = fc2_relu

        return self._head_feat
Exemple #29
0
    def get_roi_feature(self, conv_fpn_feat, proposal):
        p = self.p
        rcnn_stride = p.stride

        group = mx.symbol.Custom(rois=proposal, op_type='assign_layer_fpn')
        proposal_fpn = dict()
        proposal_fpn["stride4"] = group[1]
        proposal_fpn["stride8"] = group[2]
        proposal_fpn["stride16"] = group[3]
        proposal_fpn["stride32"] = group[4]

        if p.fp16:
            for stride in rcnn_stride:
                conv_fpn_feat["stride%s" % stride] = X.to_fp32(
                    conv_fpn_feat["stride%s" % stride],
                    name="fpn_stride%s_to_fp32")

        fpn_roi_feats = list()
        for stride in rcnn_stride:
            feat_lvl = conv_fpn_feat["stride%s" % stride]
            proposal_lvl = proposal_fpn["stride%s" % stride]
            roi_feat = X.roi_align(feat_lvl,
                                   rois=proposal_lvl,
                                   out_size=p.out_size,
                                   stride=stride,
                                   name="roi_align")
            roi_feat = X.reshape(data=roi_feat,
                                 shape=(-3, -2),
                                 name='roi_feat_reshape')
            fpn_roi_feats.append(roi_feat)
        roi_feat = X.add_n(*fpn_roi_feats)

        if p.fp16:
            roi_feat = X.to_fp16(roi_feat, name="roi_feat_to_fp16")

        return roi_feat
Exemple #30
0
    def get_all_proposal(self, conv_fpn_feat, im_info):
        if self._proposal is not None:
            return self._proposal

        p = self.p
        rpn_stride = p.anchor_generate.stride
        anchor_scale = p.anchor_generate.scale
        anchor_ratio = p.anchor_generate.ratio
        pre_nms_top_n = p.proposal.pre_nms_top_n
        post_nms_top_n = p.proposal.post_nms_top_n
        nms_thr = p.proposal.nms_thr
        min_bbox_side = p.proposal.min_bbox_side
        num_anchors = len(p.anchor_generate.ratio) * len(
            p.anchor_generate.scale)
        batch_size = p.batch_image

        cls_logit_dict, bbox_delta_dict = self.get_output(conv_fpn_feat)

        # rpn rois for multi level feature
        proposal_list = []
        proposal_scores_list = []
        for stride in rpn_stride:
            rpn_cls_logit = cls_logit_dict[stride]
            rpn_bbox_delta = bbox_delta_dict[stride]
            # ROI Proposal
            rpn_cls_logit_reshape = X.reshape(
                data=rpn_cls_logit,
                shape=(0, 2, -1, 0),
                name="rpn_cls_logit_reshape_stride%s" % stride)
            rpn_cls_score = mx.symbol.SoftmaxActivation(
                data=rpn_cls_logit_reshape,
                mode="channel",
                name="rpn_cls_score_stride%s" % stride)
            rpn_cls_score_reshape = X.reshape(
                data=rpn_cls_score,
                shape=(0, 2 * num_anchors, -1, 0),
                name="rpn_cls_score_reshape_stride%s" % stride)
            rpn_proposal, rpn_proposal_scores = mx.sym.contrib.Proposal_v3(
                cls_prob=rpn_cls_score_reshape,
                bbox_pred=rpn_bbox_delta,
                im_info=im_info,
                rpn_pre_nms_top_n=pre_nms_top_n,
                rpn_post_nms_top_n=post_nms_top_n,
                feature_stride=stride,
                output_score=True,
                scales=tuple(anchor_scale),
                ratios=tuple(anchor_ratio),
                rpn_min_size=min_bbox_side,
                threshold=nms_thr,
                iou_loss=False)

            if p.nnvm_proposal and stride < rpn_stride[-2]:
                max_side = p.anchor_generate.max_side
                assert max_side is not None, "nnvm proposal requires max_side of image"

                from mxnext.tvm.proposal import proposal as Proposal
                anchors = self.anchor_dict["stride%s" % stride]
                rpn_proposal, rpn_proposal_scores = Proposal(
                    cls_prob=rpn_cls_score_reshape,
                    bbox_pred=rpn_bbox_delta,
                    im_info=im_info,
                    anchors=anchors,
                    name='proposal',
                    feature_stride=stride,
                    scales=tuple(anchor_scale),
                    ratios=tuple(anchor_ratio),
                    rpn_pre_nms_top_n=pre_nms_top_n,
                    rpn_post_nms_top_n=post_nms_top_n,
                    threshold=nms_thr,
                    batch_size=batch_size,
                    max_side=max_side,
                    output_score=True,
                    variant="simpledet")
            proposal_list.append(rpn_proposal)
            proposal_scores_list.append(rpn_proposal_scores)

        # concat output rois of each level
        proposal_concat = X.concat(proposal_list,
                                   axis=1,
                                   name="proposal_concat")
        proposal_scores_concat = X.concat(proposal_scores_list,
                                          axis=1,
                                          name="proposal_scores_concat")

        from mxnext.tvm.get_top_proposal import get_top_proposal
        proposal = get_top_proposal(mx.symbol,
                                    bbox=proposal_concat,
                                    score=proposal_scores_concat,
                                    top_n=post_nms_top_n,
                                    batch_size=batch_size)
        self._proposal = proposal

        return proposal