Exemple #1
0
    def _bbox_nms(self, refined_props_nms):
        """Non-maximum suppression on each class of refined proposals.
        
        Args:
            refined_props_nms(Tensor): [N, M, (cls, x1, y1, x2, y2, cls_score)].

        Returns:
            keep_idx(LongTensor): keep index after NMS for final bounding box output.
        """
        assert refined_props_nms.size(0) == 1, "batch size >=2 is not supported yet."
        refined_props_nms.squeeze_(0)

        nms_thresh = float(self.config['TEST']['NMS_THRESH'])
        props_indexed = {}  # indexed by class
        # record position in input refined_props
        for pos, prop in enumerate(refined_props_nms):
            props_indexed.setdefault(prop[0], []).append((pos, prop[1:]))

        keep_idx = []
        for cls, pos_props in props_indexed.items():
            pos = [i[0] for i in pos_props]
            prop = [i[1].unsqueeze(0) for i in pos_props]
            pos = refined_props_nms.new(pos).long()
            prop = torch.cat(prop)
            score = prop[:, 4]
            order = torch.sort(score, dim=0, descending=True)[1]
            pos_ordered = pos[order]
            prop_ordered = prop[order]
            keep_idx_per_cls = nms(prop_ordered, nms_thresh)
            keep_idx.append(pos_ordered[keep_idx_per_cls])

        keep_idx = torch.cat(keep_idx).long()

        return keep_idx
Exemple #2
0
    def forward(self, feature_maps, gt_bboxes=None, img_shape=None):
        """
        
        Args:
            feature_maps(Variable): [p2, p3, p4, p5, p6] or [c5], feature pyramid or single feature 
                map.
            gt_bboxes(Tensor): [N, M, (x1, y1, x2, y2)].
            img_shape(Tensor): [height, width], Image shape. 
        Returns:
             rois(Tensor): [N, M, (idx, score, x1, y1, x2, y2)] N: batch size, M: number of roi 
                after nms, idx: bbox index in mini-batch, score: objectness of roi.
             rpn_loss_cls(Tensor): Classification loss.
             rpn_loss_bbox(Tensor): Bounding box regression loss.
        """
        batch_size = feature_maps[0].size(0)
        assert batch_size == 1, "batch_size > 1 will add support later."

        if self.use_fpn:
            if self.training:
                pre_nms_top_n = int(
                    self.config['FPN']['TRAIN_FPN_PRE_NMS_TOP_N'])
                post_nms_top_n = int(
                    self.config['FPN']['TRAIN_FPN_POST_NMS_TOP_N'])
                nms_thresh = float(self.config['FPN']['TRAIN_FPN_NMS_THRESH'])
            else:
                pre_nms_top_n = int(
                    self.config['FPN']['TEST_FPN_PRE_NMS_TOP_N'])
                post_nms_top_n = int(
                    self.config['FPN']['TEST_FPN_POST_NMS_TOP_N'])
                nms_thresh = float(self.config['FPN']['TEST_FPN_NMS_THRESH'])
            rois_pre_nms = []
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
            for idx, feature in enumerate(feature_maps):
                self.rpn.RPN_anchor_target = self.RPN_anchor_targets[idx]
                self.rpn.RPN_proposal = self.RPN_proposals[idx]
                rpn_result = self.rpn(feature, img_shape, gt_bboxes, None)
                roi_single, loss_cls_single, loss_bbox_single = rpn_result
                rpn_loss_cls += loss_cls_single
                rpn_loss_bbox += loss_bbox_single
                rois_pre_nms.append(roi_single)

            rois_pre_nms = torch.cat(rois_pre_nms,
                                     1)  # [N, M, (n, score, x1, y1, x2, y2)].
            # Apply nms to result of all pyramid rois.
            score = rois_pre_nms[0, :, 1]
            order = torch.sort(score, dim=0, descending=True)[1]
            rois_pre_nms = rois_pre_nms[:, order, :][:, :pre_nms_top_n, :]
            score = rois_pre_nms[0, :, 1].unsqueeze(-1)
            bbox = rois_pre_nms[0, :, 2:]
            keep_idx = nms(torch.cat([bbox, score], 1), nms_thresh)
            keep_idx = keep_idx[:post_nms_top_n]
            rois = torch.cat([rois_pre_nms[:, idx, :] for idx in keep_idx])
            rois = rois.unsqueeze(0)
            rpn_loss_cls /= len(feature_maps)
            rpn_loss_bbox /= len(feature_maps)
        else:
            rpn_result = self.rpn(feature_maps[0], img_shape, gt_bboxes, None)
            rois, rpn_loss_cls, rpn_loss_bbox = rpn_result
        return rois, rpn_loss_cls, rpn_loss_bbox
Exemple #3
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)


        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
                                             shift_x.ravel(), shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)

        # _, order = torch.sort(scores_keep, 1, True)

        scores_keep = scores
        proposals_keep = proposals

        if not self.training:
            # filter out score below threshold
            assert batch_size == 1
            scores_keep_idx = torch.nonzero(scores_keep > 0.05).view(-1)
            if scores_keep_idx.numel() != 0:
                scores_keep = scores_keep[:, scores_keep_idx]
                proposals_keep = proposals_keep[:, scores_keep_idx]

        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 6).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :].view(-1)

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1] = scores_single
            output[i, :num_proposal, 2:] = proposals_single

        return output