Exemple #1
0
    def forward(self,
                anchors,
                objectness,
                box_regression,
                left_targets=None,
                right_targets=None):
        device = objectness[0].device
        scores = []
        for i, score in enumerate(objectness):
            scores.append(
                score.permute(0, 2, 3,
                              1).contiguous().view(score.shape[0], -1, 2))
        scores = torch.cat(scores, 1)[:, :, 1]
        bbox_regs = []
        for i, bbox_reg in enumerate(box_regression):
            bbox_regs.append(
                bbox_reg.permute(0, 2, 3,
                                 1).contiguous().view(bbox_reg.shape[0], -1,
                                                      6))
        bbox_regs = torch.cat(bbox_regs, 1)
        anchors = list(zip(*anchors))
        combined_anchors = []
        batch_size = len(anchors[0])
        for i in range(batch_size):
            combined_anchors.append(
                cat_boxlist(
                    [anchors[level][i] for level in range(len(anchors))]))
        num_anchors = len(combined_anchors[0])
        # pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        # scores, topk_idx = scores.topk(min(pre_nms_top_n, scores.shape[1]), dim=1, sorted=True)

        # batch_idx = torch.arange(bsz, device=device)[:, None]
        # bbox_regs = bbox_regs[batch_idx, topk_idx]

        image_shapes = [box.size for box in combined_anchors]
        # concat_anchors = torch.cat([a.bbox for a in combined_anchors], dim=0)
        # concat_anchors = concat_anchors.reshape(bsz, -1, 4)[batch_idx, topk_idx]

        proposals = self.box_coder.decode(
            bbox_regs.view(-1, 6),
            torch.cat([a.bbox.view(-1, 4)
                       for a in combined_anchors]).to(device))

        proposals = proposals.view(batch_size, -1, 6)
        proposals_left = proposals[:, :, 0:4]
        proposals_right = proposals[:, :, [4, 1, 5, 3]]
        proposals_left = clip_boxes(proposals_left, image_shapes, batch_size)
        proposals_right = clip_boxes(proposals_right, image_shapes, batch_size)
        scores_keep = scores
        proposals_keep_left = proposals_left
        proposals_keep_right = proposals_right

        _, order = torch.sort(scores_keep, 1, True)

        left_result, right_result = [], []
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single_left = proposals_keep_left[i]
            proposals_single_right = proposals_keep_right[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if self.pre_nms_top_n > 0 and self.pre_nms_top_n < scores_keep.numel(
            ):
                order_single = order_single[:self.pre_nms_top_n]

            proposals_single_left = proposals_single_left[order_single, :]
            proposals_single_right = proposals_single_right[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)
            left_boxlist = BoxList(proposals_single_left,
                                   image_shapes[i],
                                   mode="xyxy")
            right_boxlist = BoxList(proposals_single_right,
                                    image_shapes[i],
                                    mode='xyxy')

            left_boxlist.add_field("objectness", scores_single.squeeze(1))
            right_boxlist.add_field("objectness", scores_single.squeeze(1))
            left_boxlist = left_boxlist.clip_to_image(remove_empty=False)
            right_boxlist = right_boxlist.clip_to_image(remove_empty=False)
            left_boxlist = remove_small_boxes(left_boxlist, self.min_size)
            right_boxlist = remove_small_boxes(right_boxlist, self.min_size)
            left_boxlist, right_boxlist = double_view_boxlist_nms(
                left_boxlist,
                right_boxlist,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field='objectness')
            left_result.append(left_boxlist)
            right_result.append(right_boxlist)
        return left_result, right_result
Exemple #2
0
 def get_ground_truth(self, index):
     img_id = self.ids[index]
     if self.split != 'test':
         left_annotation = self.annotations['left'][int(img_id)]
         right_annotation = self.annotations['right'][int(img_id)]
         info = self.get_img_info(index)
         height, width = info['height'], info['width']
         # left target
         left_target = BoxList(left_annotation["boxes"], (width, height),
                               mode="xyxy")
         left_target.add_field("labels", left_annotation["labels"])
         left_target.add_field("alphas", left_annotation['alphas'])
         boxes_3d = Box3DList(left_annotation["boxes_3d"], (width, height),
                              mode='ry_lhwxyz')
         left_target.add_field("box3d", boxes_3d)
         left_target.add_map('disparity', self.get_disparity(index))
         left_target.add_field('masks', self.get_mask(index))
         left_target.add_field(
             'truncation', torch.tensor(self.truncations_list[int(img_id)]))
         left_target.add_field(
             'occlusion', torch.tensor(self.occlusions_list[int(img_id)]))
         left_target.add_field(
             'image_size',
             torch.tensor([[width, height]]).repeat(len(left_target), 1))
         left_target.add_field('masks', self.get_mask(index))
         left_target.add_field(
             'calib', Calib(self.get_calibration(index), (width, height)))
         left_target.add_field(
             'index',
             torch.full((len(left_target), 1), index, dtype=torch.long))
         left_target.add_field(
             'imgid',
             torch.full((len(left_target), 1),
                        int(img_id),
                        dtype=torch.long))
         left_target = left_target.clip_to_image(remove_empty=True)
         # right target
         right_target = BoxList(right_annotation["boxes"], (width, height),
                                mode="xyxy")
         right_target.add_field("labels", right_annotation["labels"])
         right_target.add_field("alphas", right_annotation['alphas'])
         boxes_3d = Box3DList(right_annotation["boxes_3d"], (width, height),
                              mode='ry_lhwxyz')
         right_target.add_field("box3d", boxes_3d)
         right_target = right_target.clip_to_image(remove_empty=True)
         target = {'left': left_target, 'right': right_target}
         return target
     else:
         fakebox = torch.tensor([[0, 0, 0, 0]])
         info = self.get_img_info(index)
         height, width = info['height'], info['width']
         # left target
         left_target = BoxList(fakebox, (width, height), mode="xyxy")
         left_target.add_field(
             'image_size',
             torch.tensor([[width, height]]).repeat(len(left_target), 1))
         left_target.add_field(
             'calib', Calib(self.get_calibration(index), (width, height)))
         left_target.add_field(
             'index',
             torch.full((len(left_target), 1), index, dtype=torch.long))
         left_target.add_field('masks', self.get_mask(index))
         left_target.add_map('disparity', self.get_disparity(index))
         left_target.add_field(
             'imgid',
             torch.full((len(left_target), 1),
                        int(img_id),
                        dtype=torch.long))
         # right target
         right_target = BoxList(fakebox, (width, height), mode="xyxy")
         target = {'left': left_target, 'right': right_target}
         return target
Exemple #3
0
    def forward_for_single_feature_map(self, anchors, box_cls, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = box_cls.device
        N, _, H, W = box_cls.shape
        A = box_regression.size(1) // 4
        C = box_cls.size(1) // A

        # put in the same format as anchors
        box_cls = permute_and_flatten(box_cls, N, A, C, H, W)
        box_cls = box_cls.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
        box_regression = box_regression.reshape(N, -1, 4)

        num_anchors = A * H * W

        candidate_inds = box_cls > self.pre_nms_thresh

        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        results = []
        for per_box_cls, per_box_regression, per_pre_nms_top_n, \
        per_candidate_inds, per_anchors in zip(
            box_cls,
            box_regression,
            pre_nms_top_n,
            candidate_inds,
            anchors):

            # Sort and select TopN
            # TODO most of this can be made out of the loop for
            # all images.
            # TODO:Yang: Not easy to do. Because the numbers of detections are
            # different in each image. Therefore, this part needs to be done
            # per image.
            per_box_cls = per_box_cls[per_candidate_inds]

            per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)

            per_candidate_nonzeros = \
                    per_candidate_inds.nonzero()[top_k_indices, :]

            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            per_class += 1

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results