예제 #1
0
def test_bbox_coders():
    coder_config = {'type': constants.KEY_ORIENTS}
    bbox_coder = bbox_coders.build(coder_config)

    dataset = build_dataset()
    sample = dataset[0]
    label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D])
    p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2])
    proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D])
    num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES])

    label_boxes_3d = torch.stack(3 * [label_boxes_3d[:num_instances]], dim=0)
    proposals = torch.stack(3 * [proposals[:num_instances]], dim=0)
    p2 = torch.stack(3 * [p2], dim=0)
    orients = bbox_coder.encode_batch(label_boxes_3d, proposals, p2)
    # import ipdb
    # ipdb.set_trace()
    print(orients.shape)
    encoded_cls_orients = torch.zeros_like(orients[:, :, :2])
    cls_orients = orients[:, :, :1].long()
    row = torch.arange(0, cls_orients.numel()).type_as(cls_orients)
    encoded_cls_orients.view(-1, 2)[row, cls_orients.view(-1)] = 1
    encoded_orients = torch.cat([encoded_cls_orients, orients[:, :, 1:]],
                                dim=-1)

    ry = bbox_coder.decode_batch(encoded_orients, proposals, proposals, p2)
    # import ipdb
    # ipdb.set_trace()
    print(ry)
    print(label_boxes_3d[:, :, -1])
    print(sample[constants.KEY_IMAGE_PATH])
예제 #2
0
    def __init__(self, assigner_config):

        # some compositions
        self.similarity_calc = similarity_calcs.build(
            assigner_config['similarity_calc_config'])
        self.bbox_coder = bbox_coders.build(assigner_config['coder_config'])
        self.matcher = matchers.build(assigner_config['matcher_config'])

        self.fg_thresh = assigner_config['fg_thresh']
        self.bg_thresh = assigner_config['bg_thresh']
예제 #3
0
def test_corners_3d_coder():

    # import ipdb
    # ipdb.set_trace()
    coder_config = {'type': constants.KEY_CORNERS_3D}
    bbox_coder = bbox_coders.build(coder_config)

    dataset = build_dataset()
    sample = dataset[0]
    label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D])
    label_boxes_2d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D])
    p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2])
    proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D])
    num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES])

    # ry = compute_ray_angle(label_boxes_3d[:, :3])
    # label_boxes_3d[:, -1] += ry

    label_boxes_3d = torch.stack(1 * [label_boxes_3d[:num_instances]], dim=0)
    label_boxes_2d = torch.stack(1 * [label_boxes_2d[:num_instances]], dim=0)
    proposals = torch.stack(1 * [proposals[:num_instances]], dim=0)
    p2 = torch.stack(1 * [p2], dim=0)

    # import ipdb
    # ipdb.set_trace()
    # label_boxes_3d[:, :, -1] = 0

    encoded_corners_3d = bbox_coder.encode_batch(label_boxes_3d,
                                                 label_boxes_2d, p2)
    #  torch.cat([encoded_corners_2d, ])
    num_boxes = encoded_corners_3d.shape[1]
    batch_size = encoded_corners_3d.shape[0]

    decoded_corners_3d = bbox_coder.decode_batch(
        encoded_corners_3d.view(batch_size, num_boxes, -1), proposals, p2)

    decoded_corners_2d = geometry_utils.torch_points_3d_to_points_2d(
        decoded_corners_3d[0].view(-1, 3), p2[0]).view(-1, 8, 2)
    decoded_corners_2d = decoded_corners_2d.cpu().detach().numpy()

    image_path = sample[constants.KEY_IMAGE_PATH]
    image_dir = '/data/object/training/image_2'
    result_dir = './results/data'
    save_dir = 'results/images'
    calib_dir = '/data/object/training/calib'
    label_dir = None
    calib_file = None
    visualizer = ImageVisualizer(image_dir,
                                 result_dir,
                                 label_dir=label_dir,
                                 calib_dir=calib_dir,
                                 calib_file=calib_file,
                                 online=False,
                                 save_dir=save_dir)
    visualizer.render_image_corners_2d(image_path, decoded_corners_2d)
예제 #4
0
def test_orient_coder():
    coder_config = {'type': constants.KEY_ORIENTS}
    bbox_coder = bbox_coders.build(coder_config)

    dataset = build_dataset()
    sample = dataset[0]
    label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D])
    p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2])
    proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D])

    orients = bbox_coder.encode_batch(label_boxes_3d, proposals, p2)
    print(orients.shape)
예제 #5
0
def test_orientv3_coder():
    coder_config = {'type': constants.KEY_ORIENTS_V3}
    orient_coder = bbox_coders.build(coder_config)

    dataset = build_dataset()
    sample = dataset[0]
    label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D])
    num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES])

    label_boxes_3d = torch.stack(3 * [label_boxes_3d[:num_instances]], dim=0)
    orients = orient_coder.encode_batch(label_boxes_3d)
    print(orients)
예제 #6
0
    def assign_target(cls, **kwargs):
        match = kwargs[constants.KEY_MATCH]
        gt = kwargs[constants.KEY_BOXES_3D]
        assigned_gt = cls.generate_assigned_label(cls,
                                                  kwargs[constants.KEY_MATCH],
                                                  gt)

        coder = bbox_coders.build({'type': constants.KEY_ORIENTS_V3})
        reg_targets_batch = coder.encode_batch(assigned_gt)
        reg_targets_batch[match == -1] = 0
        # no need grad_fn
        return reg_targets_batch
예제 #7
0
    def assign_target(cls, **kwargs):
        match = kwargs[constants.KEY_MATCH]
        gt = kwargs[constants.KEY_BOXES_3D]
        assigned_gt = cls.generate_assigned_label(cls,
                                                  kwargs[constants.KEY_MATCH],
                                                  gt)
        proposals = kwargs[constants.KEY_PROPOSALS]
        p2 = kwargs[constants.KEY_STEREO_CALIB_P2]

        coder = bbox_coders.build({'type': constants.KEY_REAR_SIDE})
        reg_targets_batch = coder.encode_batch(assigned_gt, proposals, p2)
        reg_targets_batch[match == -1] = 0
        # no need grad_fn
        return reg_targets_batch
예제 #8
0
    def assign_target(cls, **kwargs):
        # match = kwargs[constants.KEY_MATCH]
        label_boxes_3d = kwargs[constants.KEY_BOXES_3D]
        p2 = kwargs[constants.KEY_STEREO_CALIB_P2]

        # prepare coder
        # 2d coder config
        coder = bbox_coders.build({'type': constants.KEY_CORNERS_3D_GRNET})

        reg_targets_batch = coder.encode_batch(label_boxes_3d, p2)
        reg_targets_batch = cls.generate_assigned_label(
            cls, kwargs[constants.KEY_MATCH], reg_targets_batch)

        # reg_targets_batch[match == -1] = 0
        # no need grad_fn
        return reg_targets_batch
예제 #9
0
 def assign_target(cls, **kwargs):
     match = kwargs[constants.KEY_MATCH]
     gt = kwargs[constants.KEY_BOXES_2D]
     proposals = kwargs[constants.KEY_PROPOSALS]
     assigned_gt = cls.generate_assigned_label(cls,
                                               kwargs[constants.KEY_MATCH],
                                               gt)
     # prepare coder
     # 2d coder config
     bbox_coder_config = kwargs[
         constants.KEY_TARGET_GENERATOR_CONFIG]['coder_config']
     coder = bbox_coders.build(bbox_coder_config)
     reg_targets_batch = coder.encode_batch(proposals, assigned_gt)
     reg_targets_batch[match == -1] = 0
     # no need grad_fn
     return reg_targets_batch
예제 #10
0
    def assign_target(cls, **kwargs):
        match = kwargs[constants.KEY_MATCH]
        # label_boxes_2d = kwargs[constants.KEY_BOXES_2D]
        proposals = kwargs[constants.KEY_PROPOSALS]
        label_boxes_3d = kwargs[constants.KEY_BOXES_3D]
        p2 = kwargs[constants.KEY_STEREO_CALIB_P2]

        # prepare coder
        # 2d coder config
        coder = bbox_coders.build({'type': constants.KEY_CORNERS_3D})
        label_boxes_3d = cls.generate_assigned_label(
            cls, kwargs[constants.KEY_MATCH], label_boxes_3d)
        reg_targets_batch = coder.encode_batch(label_boxes_3d, proposals, p2)

        reg_targets_batch[match == -1] = 0
        # no need grad_fn
        return reg_targets_batch
예제 #11
0
    def assign_target(cls, **kwargs):
        match = kwargs[constants.KEY_MATCH]
        keypoints = kwargs[constants.KEY_KEYPOINTS]

        # prepare coder
        # 2d coder config
        coder = bbox_coders.build({'type': constants.KEY_KEYPOINTS_HEATMAP})
        proposals = kwargs[constants.KEY_PROPOSALS]

        # assign label keypoints first
        assigned_keypoints = cls.generate_assigned_label(
            cls, kwargs[constants.KEY_MATCH], keypoints)
        reg_targets_batch = coder.encode_batch(proposals, assigned_keypoints)

        # reg_targets_batch = cls.generate_assigned_label(
        # cls, kwargs[constants.KEY_MATCH], reg_targets_batch)
        reg_targets_batch[match == -1] = 0
        # no need grad_fn
        return reg_targets_batch
예제 #12
0
    def assign_target(cls, **kwargs):
        match = kwargs[constants.KEY_MATCH]
        gt = kwargs[constants.KEY_BOXES_3D][:, :, 3:6]
        label_classes = kwargs[constants.KEY_CLASSES]
        mean_dims = kwargs[constants.KEY_MEAN_DIMS]
        bg_dim = torch.zeros_like(mean_dims[:, -1:, :])
        mean_dims = torch.cat([bg_dim, mean_dims], dim=1)
        mean_dims = cls.generate_assigned_label(cls, label_classes.long(),
                                                mean_dims)

        # prepare coder
        coder = bbox_coders.build({'type': constants.KEY_DIMS})
        reg_targets_batch = coder.encode_batch(gt, mean_dims)

        reg_targets_batch = cls.generate_assigned_label(
            cls, kwargs[constants.KEY_MATCH], reg_targets_batch)

        reg_targets_batch[match == -1] = 0
        # no need grad_fn
        return reg_targets_batch
예제 #13
0
    def encode_orig(self, boxes, classes, threshold=0.5):
        default_boxes = self.default_boxes
        # wh = default_boxes[:, 2:]
        default_boxes = torch.cat([
            default_boxes[:, :2] - default_boxes[:, 2:] / 2,
            default_boxes[:, :2] + default_boxes[:, 2:] / 2
        ], 1)  # xmin, ymin, xmax, ymax

        # iou = self.iou(boxes, default_boxes)  # [#obj,8732]
        similarity_calc = similarity_calcs.build({'type': 'center'})
        iou = similarity_calc.compare_batch(
            boxes.unsqueeze(0), default_boxes.unsqueeze(0)).squeeze(0)

        max_iou, max_anchor = iou.max(1)
        iou, max_idx = iou.max(0)  # [1,8732]
        max_idx.squeeze_(0)  # [8732,]
        iou.squeeze_(0)  # [8732,]

        boxes = boxes[max_idx]  # [8732,4]
        # variances = [0.1, 0.2]
        # xymin = (boxes[:, :2] - default_boxes[:, :2]) / (variances[0] * wh)
        # xymax = (boxes[:, 2:] - default_boxes[:, 2:]) / (variances[0] * wh)
        # loc = torch.cat([xymin, xymax], 1)  # [8732,4]
        coder = bbox_coders.build({'type': constants.KEY_BOXES_2D})
        loc = coder.encode_batch(default_boxes.unsqueeze(0),
                                 boxes.unsqueeze(0)).squeeze(0)

        neg = (iou < 0.4)
        ignore = (iou < threshold)
        os = torch.ones(iou.size()).long()
        os[ignore] = -1
        os[neg] = 0

        neg = (iou < 0.3)
        neg[max_anchor] = 0
        ignore[max_anchor] = 0
        conf = classes[max_idx]  # [8732,], background class = 0
        conf[ignore] = -1  # ignore[0.4, 0.5]
        conf[neg] = 0  # background

        return loc, conf, os, max_idx
예제 #14
0
    def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds,
                          im_info):
        # TODO create a new Function
        """
        Args:
        rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W)
        rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W)
        anchors: FloatTensor,shape(N,4,H,W)

        Returns:
        proposals_batch: FloatTensor, shape(N,post_nms_topN,4)
        fg_probs_batch: FloatTensor, shape(N,post_nms_topN)
        """
        # do not backward
        rpn_cls_probs = rpn_cls_probs.detach()
        rpn_bbox_preds = rpn_bbox_preds.detach()

        batch_size = rpn_bbox_preds.shape[0]

        coders = bbox_coders.build(
            self.target_generators.target_generator_config['coder_config'])
        proposals = coders.decode_batch(rpn_bbox_preds, anchors)

        # filer and clip
        proposals = box_ops.clip_boxes(proposals, im_info)

        # fg prob
        fg_probs = rpn_cls_probs[:, :, 1]

        # sort fg
        _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True)

        proposals_order = torch.zeros(
            batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order)

        return proposals, proposals_order
예제 #15
0
def test_mobileye_coder():

    coder_config = {'type': constants.KEY_MOBILEYE}
    bbox_coder = bbox_coders.build(coder_config)

    dataset = build_dataset('kitti')
    image_dir = '/data/object/training/image_2'
    result_dir = './results/data'
    save_dir = 'results/images'
    calib_dir = '/data/object/training/calib'
    label_dir = None
    calib_file = None
    visualizer = ImageVisualizer(image_dir,
                                 result_dir,
                                 label_dir=label_dir,
                                 calib_dir=calib_dir,
                                 calib_file=calib_file,
                                 online=False,
                                 save_dir=save_dir)
    for sample in dataset:
        label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D])
        label_boxes_2d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D])
        p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2])
        proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D])
        num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES])
        image_info = torch.from_numpy(sample[constants.KEY_IMAGE_INFO])

        label_boxes_3d = torch.stack(1 * [label_boxes_3d[:num_instances]],
                                     dim=0)
        label_boxes_2d = torch.stack(1 * [label_boxes_2d[:num_instances]],
                                     dim=0)
        proposals = torch.stack(1 * [proposals[:num_instances]], dim=0)
        image_info = torch.stack(1 * [image_info], dim=0)
        p2 = torch.stack(1 * [p2], dim=0)

        encoded_corners_2d = bbox_coder.encode_batch(label_boxes_3d,
                                                     label_boxes_2d, p2,
                                                     image_info,
                                                     label_boxes_2d)
        #  torch.cat([encoded_corners_2d, ])
        # num_boxes = encoded_corners_2d.shape[1]
        # batch_size = encoded_corners_2d.shape[0]
        # center_depth = encoded_corners_2d[:, :, -1:]
        # encoded_corners_2d = encoded_corners_2d[:, :, :-1].view(
        # batch_size, num_boxes, 8, 4)

        # encoded_visibility = torch.zeros_like(encoded_corners_2d[:, :, :, :2])
        # visibility = encoded_corners_2d[:, :, :, -1:].long()
        # row = torch.arange(0, visibility.numel()).type_as(visibility)
        # encoded_visibility.view(-1, 2)[row, visibility.view(-1)] = 1
        # encoded_corners_2d = torch.cat(
        # [encoded_corners_2d[:, :, :, :3], encoded_visibility], dim=-1)

        # encoded_corners_2d = torch.cat(
        # [encoded_corners_2d.view(batch_size, num_boxes, -1), center_depth],
        # dim=-1)

        decoded_corners_2d = bbox_coder.decode_batch(encoded_corners_2d,
                                                     proposals)

        decoded_corners_2d = decoded_corners_2d.cpu().detach().numpy()

        # import ipdb
        # ipdb.set_trace()
        image_path = sample[constants.KEY_IMAGE_PATH]
        visualizer.render_image_corners_2d(image_path,
                                           corners_2d=decoded_corners_2d[0],
                                           p2=p2[0])
예제 #16
0
    def forward(self, feed_dict):
        # import ipdb
        # ipdb.set_trace()

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'base_feat': base_feat})
        self.add_feat('base_feat', base_feat)

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        proposals = prediction_dict['proposals']
        multi_stage_loss_units = []
        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:
                proposals_dict = {}
                proposals_dict[constants.KEY_PRIMARY] = proposals

                # gt_dict
                gt_dict = {}
                gt_dict[constants.KEY_PRIMARY] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                gt_dict[constants.KEY_CLASSES] = None
                gt_dict[constants.KEY_BOXES_2D] = None

                # auxiliary_dict(used for encoding)
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                auxiliary_dict[constants.KEY_CLASSES] = feed_dict[
                    constants.KEY_LABEL_CLASSES]
                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals

                proposals_dict, loss_units, stats = self.target_generators[
                    i].generate_targets(proposals_dict, gt_dict,
                                        auxiliary_dict)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.rcnn_pooling(base_feat, rois.view(-1, 5))

            # shape(N,C,1,1)
            pooled_feat = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat = pooled_feat.mean(3).mean(2)

            rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat)
            rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat)

            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            batch_size = rois.shape[0]
            rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
                                                   self.n_classes)
            rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
                                                 self.n_classes)
            if not self.class_agnostic:
                # import ipdb
                # ipdb.set_trace()
                if self.training:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1))
                else:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1))

            rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)

            if self.training:
                loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores
                loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds
                # import ipdb
                # ipdb.set_trace()
                multi_stage_loss_units.extend([
                    loss_units[constants.KEY_CLASSES],
                    loss_units[constants.KEY_BOXES_2D]
                ])
                multi_stage_stats.append(stats)

            # decode for next stage
            coder = bbox_coders.build(self.target_generators[i].
                                      target_generator_config['coder_config'])
            proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach()

        if self.training:
            prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            prediction_dict[constants.KEY_STATS] = multi_stage_stats
        else:
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            proposals[:, :, ::
                      2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze(
                          -1).unsqueeze(-1)
            proposals[:, :,
                      1::2] = proposals[:, :,
                                        1::2] / image_info[:, 2].unsqueeze(
                                            -1).unsqueeze(-1)
            prediction_dict[constants.KEY_BOXES_2D] = proposals

        if self.training:
            loss_dict = self.loss(prediction_dict, feed_dict)
            return prediction_dict, loss_dict
        else:
            return prediction_dict
예제 #17
0
def test_keypoint_hm_coder():
    coder_config = {'type': constants.KEY_KEYPOINTS_HEATMAP}
    bbox_coder = bbox_coders.build(coder_config)

    dataset = build_dataset(dataset_type='keypoint_kitti')
    sample = dataset[0]
    label_boxes_3d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_3D])
    label_boxes_2d = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D])
    p2 = torch.from_numpy(sample[constants.KEY_STEREO_CALIB_P2])
    proposals = torch.from_numpy(sample[constants.KEY_LABEL_BOXES_2D])
    num_instances = torch.from_numpy(sample[constants.KEY_NUM_INSTANCES])
    keypoints = sample[constants.KEY_KEYPOINTS]

    # ry = compute_ray_angle(label_boxes_3d[:, :3])
    # label_boxes_3d[:, -1] += ry

    label_boxes_3d = torch.stack(1 * [label_boxes_3d[:num_instances]], dim=0)
    label_boxes_2d = torch.stack(1 * [label_boxes_2d[:num_instances]], dim=0)
    proposals = torch.stack(1 * [proposals[:num_instances]], dim=0)
    keypoints = torch.stack(1 * [keypoints[:num_instances]], dim=0)
    p2 = torch.stack(1 * [p2], dim=0)

    # label_boxes_3d[:, :, -1] = 0

    # import ipdb
    # ipdb.set_trace()
    encoded_corners_3d = bbox_coder.encode_batch(proposals, keypoints)
    #  torch.cat([encoded_corners_2d, ])
    num_boxes = encoded_corners_3d.shape[1]
    batch_size = encoded_corners_3d.shape[0]

    keypoint_heatmap = encoded_corners_3d.view(batch_size, num_boxes, 8,
                                               -1)[..., :-1]
    # resolution = bbox_coder.resolution
    # keypoint_heatmap = torch.zeros((batch_size * num_boxes * 8, resolution * resolution))
    # row = torch.arange(keypoint.numel()).type_as(keypoint)
    # keypoint_heatmap[row, keypoint.view(-1)] = 1
    # keypoint_heatmap = torch.stack([keypoint_heatmap] * 3, dim=1)

    # reshape before decode
    keypoint_heatmap = keypoint_heatmap.contiguous().view(
        batch_size, num_boxes, -1)

    decoded_corners_2d = bbox_coder.decode_batch(proposals, keypoint_heatmap)

    decoded_corners_2d = decoded_corners_2d.cpu().detach().numpy()

    image_path = sample[constants.KEY_IMAGE_PATH]
    image_dir = '/data/object/training/image_2'
    result_dir = './results/data'
    save_dir = 'results/images'
    calib_dir = '/data/object/training/calib'
    label_dir = None
    calib_file = None
    visualizer = ImageVisualizer(image_dir,
                                 result_dir,
                                 label_dir=label_dir,
                                 calib_dir=calib_dir,
                                 calib_file=calib_file,
                                 online=False,
                                 save_dir=save_dir)
    # import ipdb
    # ipdb.set_trace()
    visualizer.render_image_corners_2d(image_path,
                                       corners_2d=decoded_corners_2d[0])
예제 #18
0
 def __init__(self, config):
     self.coder = bbox_coders.build(config['coder_config'])
예제 #19
0
    def forward(self, feed_dict):
        features = self.feature_extractor(feed_dict[constants.KEY_IMAGE])
        y_locs1 = []
        y_locs2 = []
        y_os = []
        y_cls = []

        for i, x in enumerate(features):
            # location out
            loc_feature = self.loc_feature1(x)
            loc1 = self.box_out1(loc_feature)

            N = loc1.size(0)
            loc1 = loc1.permute(0, 2, 3, 1).contiguous()
            loc1 = loc1.view(N, -1, self.num_regress)
            y_locs1.append(loc1)

            loc_feature = torch.cat([x, loc_feature], dim=1)
            loc_feature = self.loc_feature2(loc_feature)
            loc2 = self.box_out2(loc_feature)

            N = loc2.size(0)
            loc2 = loc2.permute(0, 2, 3, 1).contiguous()
            loc2 = loc2.view(N, -1, self.num_regress)
            loc2 += loc1
            y_locs2.append(loc2)

            # os out
            cls_feature = self.cls_feature1(x)
            os_out = self.os_out(cls_feature)
            os_out = os_out.permute(0, 2, 3, 1).contiguous()
            # _size = os_out.size(1)
            os_out = os_out.view(N, -1, 2)
            y_os.append(os_out)

            cls_feature = torch.cat([x, cls_feature], dim=1)
            cls_feature = self.cls_feature2(cls_feature)
            cls_out = self.cls_out(cls_feature)

            cls_out = cls_out.permute(0, 2, 3, 1).contiguous()
            cls_out = cls_out.view(N, -1, self.num_classes)
            y_cls.append(cls_out)

        loc1_preds = torch.cat(y_locs1, dim=1)
        loc2_preds = torch.cat(y_locs2, dim=1)
        os_preds = torch.cat(y_os, dim=1)
        cls_preds = torch.cat(y_cls, dim=1)

        image_info = feed_dict[constants.KEY_IMAGE_INFO]

        batch_size = loc1_preds.shape[0]

        anchors = self.anchors.cuda()
        anchors = anchors.repeat(batch_size, 1, 1)

        coder = bbox_coders.build(
            self.target_generators.target_generator_config['coder_config'])
        proposals = coder.decode_batch(loc2_preds, anchors).detach()

        cls_probs = F.softmax(cls_preds.detach(), dim=-1)
        os_probs = F.softmax(os_preds.detach(), dim=-1)[:, :, 1:]
        os_probs[os_probs <= 0.4] = 0
        # final_probs = cls_probs * os_probs
        # import ipdb
        # ipdb.set_trace()
        final_probs = cls_probs * os_probs
        image_info = feed_dict[constants.KEY_IMAGE_INFO].unsqueeze(
            -1).unsqueeze(-1)

        prediction_dict = {}
        if self.training:

            # anchors = prediction_dict['anchors']
            anchors_dict = {}
            anchors_dict[constants.KEY_PRIMARY] = anchors
            anchors_dict[constants.KEY_BOXES_2D] = loc1_preds
            anchors_dict[constants.KEY_BOXES_2D_REFINE] = loc2_preds
            anchors_dict[constants.KEY_CLASSES] = cls_preds
            anchors_dict[constants.KEY_OBJECTNESS] = os_preds
            # anchors_dict[constants.KEY_FINAL_PROBS] = final_probs

            gt_dict = {}
            gt_dict[constants.KEY_PRIMARY] = feed_dict[constants.
                                                       KEY_LABEL_BOXES_2D]
            gt_dict[constants.KEY_CLASSES] = None
            gt_dict[constants.KEY_BOXES_2D] = None
            gt_dict[constants.KEY_OBJECTNESS] = None
            gt_dict[constants.KEY_BOXES_2D_REFINE] = None

            auxiliary_dict = {}
            label_boxes_2d = feed_dict[constants.KEY_LABEL_BOXES_2D]
            if self.normlize_anchor:
                label_boxes_2d[:, :, ::2] = label_boxes_2d[:, :, ::
                                                           2] / image_info[:, 1]
                label_boxes_2d[:, :, 1::2] = label_boxes_2d[:, :, 1::
                                                            2] / image_info[:,
                                                                            0]
            auxiliary_dict[constants.KEY_BOXES_2D] = label_boxes_2d
            gt_labels = feed_dict[constants.KEY_LABEL_CLASSES]
            auxiliary_dict[constants.KEY_CLASSES] = gt_labels
            auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                constants.KEY_NUM_INSTANCES]
            auxiliary_dict[constants.KEY_PROPOSALS] = anchors

            proposals_dict, targets, stats = self.target_generators.generate_targets(
                anchors_dict, gt_dict, auxiliary_dict, subsample=False)

            # recall
            anchors_dict[constants.KEY_PRIMARY] = proposals
            _, _, second_stage_stats = self.target_generators.generate_targets(
                anchors_dict, gt_dict, auxiliary_dict, subsample=False)

            # precision
            fg_probs, _ = final_probs[:, :, 1:].max(dim=-1)
            fake_match = auxiliary_dict[constants.KEY_FAKE_MATCH]
            second_stage_stats.update(
                Analyzer.analyze_precision(
                    fake_match,
                    fg_probs,
                    feed_dict[constants.KEY_NUM_INSTANCES],
                    thresh=0.3))

            prediction_dict[constants.KEY_STATS] = [stats, second_stage_stats]
            prediction_dict[constants.KEY_TARGETS] = targets
        else:

            prediction_dict[constants.KEY_CLASSES] = final_probs
            # prediction_dict[constants.KEY_OBJECTNESS] = os_preds

            proposals[:, :, ::2] = proposals[:, :, ::2] / image_info[:, 3]
            proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[:, 2]
            prediction_dict[constants.KEY_BOXES_2D] = proposals
        # return prediction_dict
        if self.training:
            loss_dict = self.loss(prediction_dict, feed_dict)
            return prediction_dict, loss_dict
        else:
            return prediction_dict
예제 #20
0
    def forward(self, feed_dict):
        self.target_assigner.bbox_coder_3d.mean_dims = feed_dict[
            constants.KEY_MEAN_DIMS]
        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'base_feat': base_feat})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        second_pooled_feat = self.feature_extractor.second_stage_feature(
            pooled_feat)

        second_pooled_feat = second_pooled_feat.mean(3).mean(2)

        rcnn_cls_scores = self.rcnn_cls_preds(second_pooled_feat)
        rcnn_bbox_preds = self.rcnn_bbox_preds(second_pooled_feat)
        rcnn_3d = self.rcnn_3d_pred(second_pooled_feat)

        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]

        ###################################
        # 3d training
        ###################################

        prediction_dict['rcnn_3d'] = rcnn_3d

        # if not self.training:
        # _, pred_labels = rcnn_cls_probs.max(dim=-1)
        # rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_bbox(
        # rcnn_3d, rois_batch[0, :, 1:])

        # prediction_dict['rcnn_3d'] = rcnn_3d
        # prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs

        batch_size = feed_dict[constants.KEY_IMAGE].shape[0]
        # coder = bbox_coders.build({'type': constants.KEY_BOXES_2D})
        coder = self.target_assigner.bbox_coder
        proposals = coder.decode_batch(rcnn_bbox_preds.view(batch_size, -1, 4),
                                       rois_batch[0, :, 1:]).detach()

        # used for track
        # proposals_order = prediction_dict['proposals_order']
        # prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
        # proposals_order]

        ###################################
        # 3d training
        ###################################

        # prediction_dict['rcnn_3d'] = rcnn_3d

        if not self.training:

            new_rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_bbox(
                rcnn_3d, rois_batch[0])

            # prediction_dict['rcnn_3d'] = rcnn_3d
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs.view(
                batch_size, -1, self.n_classes)

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            proposals[:, :, ::
                      2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze(
                          -1).unsqueeze(-1)
            proposals[:, :,
                      1::2] = proposals[:, :,
                                        1::2] / image_info[:, 2].unsqueeze(
                                            -1).unsqueeze(-1)
            prediction_dict[constants.KEY_BOXES_2D] = proposals

            #  import ipdb
            #  ipdb.set_trace()
            dims = self.squeeze_bbox_preds(
                new_rcnn_3d[:, :-3].contiguous(),
                rcnn_cls_probs.argmax(dim=-1).view(-1),
                out_c=3).view(batch_size, -1, 3)
            #  import ipdb
            #  ipdb.set_trace()

            #  import ipdb
            #  ipdb.set_trace()
            #  coder = bbox_coders.build({'type': constants.KEY_DIMS})
            #  dims = coder.decode_batch(rcnn_3d[:, :3].view(batch_size, -1, 3),
            #  feed_dict[constants.KEY_MEAN_DIMS],
            #  rcnn_cls_probs).detach()
            rcnn_orient_preds = rcnn_3d[:, 3:].view(batch_size, -1, 5)
            coder = bbox_coders.build({'type': constants.KEY_ORIENTS_V2})
            orients = coder.decode_batch(
                rcnn_orient_preds, proposals,
                feed_dict[constants.KEY_STEREO_CALIB_P2_ORIG]).detach()
            prediction_dict[constants.KEY_DIMS] = dims
            prediction_dict[constants.KEY_ORIENTS_V2] = orients
            prediction_dict['rcnn_3d'] = torch.cat(
                [dims, new_rcnn_3d[:, -3:].view(batch_size, -1, 3)], dim=-1)

        return prediction_dict
예제 #21
0
    def forward(self, feed_dict):
        im_info = feed_dict[constants.KEY_IMAGE_INFO]

        prediction_dict = {}

        # base model
        rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'rpn_feat_maps': rpn_feat_maps})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        proposals = prediction_dict['proposals']
        multi_stage_loss_units = []
        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:
                # proposals_dict
                proposals_dict = {}
                proposals_dict[constants.KEY_PRIMARY] = proposals

                # gt_dict
                gt_dict = {}
                gt_dict[constants.KEY_PRIMARY] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                gt_dict[constants.KEY_CLASSES] = None
                gt_dict[constants.KEY_BOXES_2D] = None
                gt_dict[constants.KEY_MOBILEYE] = None
                gt_dict[constants.KEY_DIMS] = None

                # auxiliary_dict(used for encoding)
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[
                    constants.KEY_STEREO_CALIB_P2]
                auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                auxiliary_dict[constants.KEY_CLASSES] = feed_dict[
                    constants.KEY_LABEL_CLASSES]
                auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[
                    constants.KEY_LABEL_BOXES_3D]
                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals
                auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[
                    constants.KEY_MEAN_DIMS]
                auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[
                    constants.KEY_IMAGE_INFO]

                proposals_dict, loss_units, stats = self.target_generators[
                    i].generate_targets(proposals_dict, gt_dict,
                                        auxiliary_dict)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps,
                                                    rois.view(-1, 5),
                                                    im_info[0][:2])

            # shape(N,C,1,1)
            pooled_feat_for_corners = self.feature_extractor.second_stage_feature(
                pooled_feat)
            # pooled_feat_for_keypoint = F.upsample_bilinear(
            # pooled_feat_for_corners, size=(14, 14))
            keypoint_map = self.keypoint_predictor(pooled_feat)
            # keypoint_map = self.rcnn_keypoint_preds(pooled_feat_for_keypoint)
            keypoint_map = keypoint_map.mean(-2)
            # keypoint_map = F.softmax(keypoint_map, dim=-1)

            pooled_feat_for_corners = pooled_feat_for_corners.mean(3).mean(2)

            rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat_for_corners)
            rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat_for_corners)
            rcnn_corners_preds = self.rcnn_corners_preds[i](
                pooled_feat_for_corners)
            rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat_for_corners)

            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            batch_size = rois.shape[0]
            rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
                                                   self.n_classes)
            rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
                                                 self.n_classes)
            if not self.class_agnostic:
                # import ipdb
                # ipdb.set_trace()
                if self.training:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1))

                else:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1))

            # not class_agnostic for dims
            # import ipdb
            # ipdb.set_trace()
            if not self.class_agnostic_3d:
                if self.training:
                    rcnn_dim_preds = self.squeeze_bbox_preds(
                        rcnn_dim_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1),
                        out_c=3)
                else:
                    rcnn_dim_preds = self.squeeze_bbox_preds(
                        rcnn_dim_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1),
                        out_c=3)

            rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)
            rcnn_corners_preds = rcnn_corners_preds.view(
                batch_size, rcnn_bbox_preds.shape[1], -1)

            # rcnn_depth_preds = rcnn_depth_preds.view(
            # batch_size, rcnn_bbox_preds.shape[1], -1)
            # rcnn_center_depth_preds = rcnn_center_depth_preds.view(
            # batch_size, rcnn_bbox_preds.shape[1], -1)
            # concat them(depth and corners)
            # rcnn_corners_preds = torch.cat(
            # [rcnn_corners_preds, rcnn_depth_preds], dim=-1)

            # # append center depth
            # rcnn_corners_preds = torch.cat(
            # [rcnn_corners_preds, rcnn_center_depth_preds], dim=-1)

            # rcnn_visibility_preds = rcnn_visibility_preds.view(
            # batch_size, rcnn_bbox_preds.shape[1], -1)
            rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3)

            # decode for next stage

            coder = bbox_coders.build({'type': constants.KEY_DIMS})
            rcnn_dim_preds = coder.decode_batch(
                rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS],
                rcnn_cls_probs).detach()

            # rcnn_corners_preds = coder.decode_batch(
            # rcnn_corners_preds.detach(), proposals)

            # import ipdb
            # ipdb.set_trace()
            # if self.training_depth:
            # # predict for depth
            # rois = box_ops.box2rois(proposals)
            # pooled_feat_for_depth = self.pyramid_rcnn_pooling(
            # rcnn_feat_maps, rois.view(-1, 5), im_info[0][:2])

            # shape(N,C,1,1)
            # pooled_feat_for_depth = self.third_stage_feature(pooled_feat)
            # pooled_feat_for_depth = pooled_feat_for_depth.mean(3).mean(2)
            # rcnn_depth_preds = self.rcnn_depth_preds[i](pooled_feat_for_depth)

            # encode
            # rcnn_depth_preds = 1 / (rcnn_depth_preds.sigmoid() + 1e-6) - 1
            # rcnn_depth_preds = rcnn_depth_preds.view(
            # batch_size, rcnn_bbox_preds.shape[1], -1)

            # # concat them(depth and corners)
            # rcnn_corners_preds = self.fuse_corners_and_depth(
            # rcnn_corners_preds, rcnn_depth_preds)
            # rcnn_corners_preds = torch.cat(
            # [rcnn_corners_preds, rcnn_depth_preds], dim=-1)

            # # # append center depth
            # rcnn_corners_preds = torch.cat(
            # [rcnn_corners_preds, rcnn_center_depth_preds], dim=-1)

            if self.training:
                loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores
                loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds
                loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds
                loss_units[constants.KEY_MOBILEYE]['pred'] = rcnn_corners_preds
                multi_stage_loss_units.append([
                    loss_units[constants.KEY_CLASSES],
                    loss_units[constants.KEY_BOXES_2D],
                    loss_units[constants.KEY_MOBILEYE],
                    loss_units[constants.KEY_DIMS]
                ])
                multi_stage_stats.append(stats)
        coder = bbox_coders.build({'type': constants.KEY_MOBILEYE})
        rcnn_corners_preds = coder.decode_batch(rcnn_corners_preds.detach(),
                                                proposals,
                                                keypoint_map.detach())
        prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds
        prediction_dict[constants.KEY_KEYPOINTS_HEATMAP] = keypoint_map
        # if self.training:
        # corners_2d_gt = coder.decode_batch(
        # loss_units[constants.KEY_MOBILEYE]['target'], proposals)
        # prediction_dict['corners_2d_gt'] = corners_2d_gt
        if self.training:
            prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            prediction_dict[constants.KEY_STATS] = multi_stage_stats
        else:
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs
            coder = bbox_coders.build(self.target_generators[i].
                                      target_generator_config['coder_config'])
            proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach()

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            proposals[:, :, ::
                      2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze(
                          -1).unsqueeze(-1)
            proposals[:, :,
                      1::2] = proposals[:, :,
                                        1::2] / image_info[:, 2].unsqueeze(
                                            -1).unsqueeze(-1)

            rcnn_corners_preds[:, :, :,
                               0] = rcnn_corners_preds[:, :, :,
                                                       0] / image_info[:, None,
                                                                       None, 3]
            rcnn_corners_preds[:, :, :,
                               1] = rcnn_corners_preds[:, :, :,
                                                       1] / image_info[:, None,
                                                                       None, 2]
            prediction_dict[constants.KEY_BOXES_2D] = proposals
            # prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds
            # prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds

        if self.training:
            loss_dict = self.loss(prediction_dict, feed_dict)
            return prediction_dict, loss_dict
        else:
            return prediction_dict
예제 #22
0
    def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds,
                          im_info):
        # TODO create a new Function
        """
        Args:
        rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W)
        rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W)
        anchors: FloatTensor,shape(N,4,H,W)

        Returns:
        proposals_batch: FloatTensor, shape(N,post_nms_topN,4)
        fg_probs_batch: FloatTensor, shape(N,post_nms_topN)
        """
        # assert len(
        # rpn_bbox_preds) == 1, 'just one feature maps is supported now'
        # rpn_bbox_preds = rpn_bbox_preds[0]
        # do not backward
        rpn_cls_probs = rpn_cls_probs.detach()
        rpn_bbox_preds = rpn_bbox_preds.detach()

        batch_size = rpn_bbox_preds.shape[0]

        coders = bbox_coders.build(
            self.target_generators.target_generator_config['coder_config'])
        proposals = coders.decode_batch(rpn_bbox_preds, anchors)

        # filer and clip
        proposals = box_ops.clip_boxes(proposals, im_info)

        # fg prob
        fg_probs = rpn_cls_probs[:, :, 1]

        # sort fg
        _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True)

        # fg_probs_batch = torch.zeros(batch_size,
        # self.post_nms_topN).type_as(rpn_cls_probs)
        proposals_batch = torch.zeros(batch_size, self.post_nms_topN,
                                      4).type_as(rpn_bbox_preds)
        proposals_order = torch.zeros(
            batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order)

        for i in range(batch_size):
            proposals_single = proposals[i]
            fg_probs_single = fg_probs[i]
            fg_order_single = fg_probs_order[i]
            # pre nms
            if self.pre_nms_topN > 0:
                fg_order_single = fg_order_single[:self.pre_nms_topN]
            proposals_single = proposals_single[fg_order_single]
            fg_probs_single = fg_probs_single[fg_order_single]

            # nms
            keep_idx_i = nms(proposals_single, fg_probs_single,
                             self.nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            # post nms
            if self.post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:self.post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            fg_probs_single = fg_probs_single[keep_idx_i]
            fg_order_single = fg_order_single[keep_idx_i]

            # padding 0 at the end.
            num_proposal = keep_idx_i.numel()
            proposals_batch[i, :num_proposal, :] = proposals_single
            # fg_probs_batch[i, :num_proposal] = fg_probs_single
            proposals_order[i, :num_proposal] = fg_order_single
        return proposals_batch, proposals_order
예제 #23
0
    def forward(self, feed_dict):
        im_info = feed_dict[constants.KEY_IMAGE_INFO]

        prediction_dict = {}

        # base model
        rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'rpn_feat_maps': rpn_feat_maps})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        proposals = prediction_dict['proposals']
        multi_stage_loss_units = []
        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:
                # proposals_dict
                proposals_dict = {}
                proposals_dict[constants.KEY_PRIMARY] = proposals

                # gt_dict
                gt_dict = {}
                gt_dict[constants.KEY_PRIMARY] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                gt_dict[constants.KEY_CLASSES] = None
                gt_dict[constants.KEY_BOXES_2D] = None
                gt_dict[constants.KEY_CORNERS_2D] = None
                # gt_dict[constants.KEY_CORNERS_VISIBILITY] = None
                # gt_dict[constants.KEY_ORIENTS_V2] = None
                gt_dict[constants.KEY_DIMS] = None

                # auxiliary_dict(used for encoding)
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[
                    constants.KEY_STEREO_CALIB_P2]
                auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                auxiliary_dict[constants.KEY_CLASSES] = feed_dict[
                    constants.KEY_LABEL_CLASSES]
                auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[
                    constants.KEY_LABEL_BOXES_3D]
                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals
                auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[
                    constants.KEY_MEAN_DIMS]
                auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[
                    constants.KEY_IMAGE_INFO]

                proposals_dict, loss_units, stats = self.target_generators[
                    i].generate_targets(proposals_dict, gt_dict,
                                        auxiliary_dict)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps,
                                                    rois.view(-1, 5),
                                                    im_info[0][:2])

            # shape(N,C,1,1)
            pooled_feat_for_corners = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat_for_corners = pooled_feat_for_corners.mean(3).mean(2)

            rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat_for_corners)
            rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat_for_corners)
            rcnn_corners_preds = self.rcnn_corners_preds[i](
                pooled_feat_for_corners)

            rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat_for_corners)

            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            batch_size = rois.shape[0]
            rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
                                                   self.n_classes)
            rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
                                                 self.n_classes)

            rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)
            rcnn_corners_preds = rcnn_corners_preds.view(
                batch_size, rcnn_bbox_preds.shape[1], -1)

            rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3)

            rcnn_depth_preds = self.rcnn_depth_preds[i](
                pooled_feat_for_corners)

            # rcnn_depth_preds = 1 / (rcnn_depth_preds.sigmoid() + 1e-6) - 1
            rcnn_depth_preds = rcnn_depth_preds.view(batch_size,
                                                     rcnn_bbox_preds.shape[1],
                                                     -1)

            rcnn_corners_preds = self.fuse_corners_and_depth(
                rcnn_corners_preds, rcnn_depth_preds)

            # rcnn_corners_preds = torch.cat(
            # [rcnn_corners_preds, rcnn_depth_preds], dim=-1)

            # # # append center depth
            # rcnn_corners_preds = torch.cat(
            # [rcnn_corners_preds, rcnn_center_depth_preds], dim=-1)

            if self.training:
                loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores
                loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds
                loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds
                loss_units[
                    constants.KEY_CORNERS_2D]['pred'] = rcnn_corners_preds
                # loss_units[constants.KEY_CORNERS_VISIBILITY][
                # 'pred'] = rcnn_visibility_preds
                # import ipdb
                # ipdb.set_trace()
                multi_stage_loss_units.append([
                    loss_units[constants.KEY_CLASSES],
                    loss_units[constants.KEY_BOXES_2D],
                    loss_units[constants.KEY_CORNERS_2D],
                    loss_units[constants.KEY_DIMS]
                ])
                multi_stage_stats.append(stats)
            else:
                #  import ipdb
                #  ipdb.set_trace()
                center_depth = rcnn_corners_preds[:, :, -1:]
                coder = bbox_coders.build(
                    {'type': constants.KEY_CORNERS_2D_NEAREST_DEPTH})
                rcnn_corners_preds = coder.decode_batch(
                    rcnn_corners_preds.detach(), proposals,
                    feed_dict[constants.KEY_STEREO_CALIB_P2])
                coder = bbox_coders.build(
                    self.target_generators[i].
                    target_generator_config['coder_config'])
                proposals = coder.decode_batch(rcnn_bbox_preds,
                                               proposals).detach()
                coder = bbox_coders.build({'type': constants.KEY_DIMS})
                rcnn_dim_preds = coder.decode_batch(
                    rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS],
                    rcnn_cls_probs).detach()

        if self.training:
            prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            prediction_dict[constants.KEY_STATS] = multi_stage_stats
        else:
            prediction_dict[constants.KEY_CENTER_DEPTH] = center_depth
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            proposals[:, :, ::
                      2] = proposals[:, :, ::2] / image_info[:, 3].unsqueeze(
                          -1).unsqueeze(-1)
            proposals[:, :,
                      1::2] = proposals[:, :,
                                        1::2] / image_info[:, 2].unsqueeze(
                                            -1).unsqueeze(-1)

            rcnn_corners_preds[:, :, :,
                               0] = rcnn_corners_preds[:, :, :,
                                                       0] / image_info[:, None,
                                                                       None,
                                                                       3:4]
            rcnn_corners_preds[:, :, :,
                               1] = rcnn_corners_preds[:, :, :,
                                                       1] / image_info[:, None,
                                                                       None,
                                                                       2:3]

            prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds
            prediction_dict[constants.KEY_BOXES_2D] = proposals
            prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds
            prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds

        if self.training:
            loss_dict = self.loss(prediction_dict, feed_dict)
            return prediction_dict, loss_dict
        else:
            return prediction_dict
예제 #24
0
    def _set_coders(self, name):
        import bbox_coders

        config = {'type': name}
        self._coders = bbox_coders.build(config)
예제 #25
0
    def forward(self, feed_dict):
        im_info = feed_dict[constants.KEY_IMAGE_INFO]

        prediction_dict = {}

        # base model
        rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'rpn_feat_maps': rpn_feat_maps})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        proposals = prediction_dict['proposals']
        multi_stage_loss_units = []
        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:
                # proposals_dict
                proposals_dict = {}
                proposals_dict[constants.KEY_PRIMARY] = proposals

                # gt_dict
                gt_dict = {}
                gt_dict[constants.KEY_PRIMARY] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                gt_dict[constants.KEY_CLASSES] = None
                gt_dict[constants.KEY_BOXES_2D] = None
                gt_dict[constants.KEY_CORNERS_2D] = None
                gt_dict[constants.KEY_DIMS] = None

                # auxiliary_dict(used for encoding)
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[
                    constants.KEY_STEREO_CALIB_P2]
                auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                auxiliary_dict[constants.KEY_CLASSES] = feed_dict[
                    constants.KEY_LABEL_CLASSES]
                auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[
                    constants.KEY_LABEL_BOXES_3D]
                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals
                auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[
                    constants.KEY_MEAN_DIMS]
                auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[
                    constants.KEY_IMAGE_INFO]

                proposals_dict, loss_units, stats = self.target_generators[
                    i].generate_targets(proposals_dict, gt_dict,
                                        auxiliary_dict)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps,
                                                    rois.view(-1, 5),
                                                    im_info[0][:2])

            # shape(N,C,1,1)
            pooled_feat_for_corners = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat_for_corners = pooled_feat_for_corners.mean(3).mean(2)

            rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat_for_corners)
            rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat_for_corners)
            rcnn_corners_preds = self.rcnn_corners_preds[i](
                pooled_feat_for_corners)

            rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat_for_corners)

            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            batch_size = rois.shape[0]
            rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
                                                   self.n_classes)
            rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
                                                 self.n_classes)
            if not self.class_agnostic:
                if self.training:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1))

                else:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1))

            # not class_agnostic for dims
            if not self.class_agnostic_3d:
                if self.training:
                    rcnn_dim_preds = self.squeeze_bbox_preds(
                        rcnn_dim_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1),
                        out_c=3)
                else:
                    rcnn_dim_preds = self.squeeze_bbox_preds(
                        rcnn_dim_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1),
                        out_c=3)

            rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)
            rcnn_corners_preds = rcnn_corners_preds.view(
                batch_size, rcnn_bbox_preds.shape[1], -1)

            rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3)

            # shape(N,C,1,1)

            if self.training:
                loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores
                loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds
                loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds
                loss_units[
                    constants.KEY_CORNERS_2D]['pred'] = rcnn_corners_preds
                multi_stage_loss_units.append([
                    loss_units[constants.KEY_CLASSES],
                    loss_units[constants.KEY_BOXES_2D],
                    loss_units[constants.KEY_CORNERS_2D],
                    loss_units[constants.KEY_DIMS]
                ])
                multi_stage_stats.append(stats)

            # decode for next stage
            # corners decode
            corners_coder = bbox_coders.build(
                {'type': constants.KEY_CORNERS_2D_STABLE})
            rcnn_corners_preds = corners_coder.decode_batch(
                rcnn_corners_preds.detach(), proposals)

            # bbox decode
            boxes_coder = bbox_coders.build(
                self.target_generators[i].
                target_generator_config['coder_config'])
            proposals = boxes_coder.decode_batch(rcnn_bbox_preds,
                                                 proposals).detach()

            # dims decode
            dims_coder = bbox_coders.build({'type': constants.KEY_DIMS})
            rcnn_dim_preds = dims_coder.decode_batch(
                rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS],
                rcnn_cls_probs).detach()

        if self.training:
            prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            prediction_dict[constants.KEY_STATS] = multi_stage_stats
        else:
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            image_info = image_info.unsqueeze(1).unsqueeze(1)
            proposals[:, :, ::2] = proposals[:, :, ::2] / image_info[..., 3]
            proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[..., 2]

            rcnn_corners_preds[..., 0] = rcnn_corners_preds[
                ..., 0] / image_info[..., 3].unsqueeze(-1)
            rcnn_corners_preds[..., 1] = rcnn_corners_preds[
                ..., 1] / image_info[..., 2].unsqueeze(-1)

            prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds
            prediction_dict[constants.KEY_BOXES_2D] = proposals
            prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds
            prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds

        if self.training:
            loss_dict = self.loss(prediction_dict, feed_dict)
            return prediction_dict, loss_dict
        else:
            return prediction_dict
예제 #26
0
    def forward(self, feed_dict):
        features = self.feature_extractor(feed_dict[constants.KEY_IMAGE])
        y_locs1 = []
        y_locs2 = []
        y_os = []
        y_cls = []

        for i, x in enumerate(features):
            # location out
            loc_feature = self.loc_feature1(x)
            loc1 = self.box_out1(loc_feature)

            N = loc1.size(0)
            loc1 = loc1.permute(0, 2, 3, 1).contiguous()
            loc1 = loc1.view(N, -1, self.num_regress)
            y_locs1.append(loc1)

            loc_feature = torch.cat([x, loc_feature], dim=1)
            loc_feature = self.loc_feature2(loc_feature)
            loc2 = self.box_out2(loc_feature)

            N = loc2.size(0)
            loc2 = loc2.permute(0, 2, 3, 1).contiguous()
            loc2 = loc2.view(N, -1, self.num_regress)
            loc2 += loc1
            y_locs2.append(loc2)

            # os out
            cls_feature = self.cls_feature1(x)
            os_out = self.os_out(cls_feature)
            os_out = os_out.permute(0, 2, 3, 1).contiguous()
            # _size = os_out.size(1)
            os_out = os_out.view(N, -1, 2)
            y_os.append(os_out)

            cls_feature = torch.cat([x, cls_feature], dim=1)
            cls_feature = self.cls_feature2(cls_feature)
            cls_out = self.cls_out(cls_feature)

            cls_out = cls_out.permute(0, 2, 3, 1).contiguous()
            cls_out = cls_out.view(N, -1, self.num_classes)
            y_cls.append(cls_out)

        loc1_preds = torch.cat(y_locs1, dim=1)
        loc2_preds = torch.cat(y_locs2, dim=1)
        os_preds = torch.cat(y_os, dim=1)
        cls_preds = torch.cat(y_cls, dim=1)

        # if self.training:
        # prediction_dict = {
        # 'loc1_preds': loc1_preds,
        # 'loc2_preds': loc2_preds,
        # 'os_preds': os_preds,
        # 'cls_preds': cls_preds
        # }

        # stats = {
        # 'recall': torch.tensor([1, 1]).to('cuda').float().unsqueeze(0)
        # }
        # prediction_dict[constants.KEY_STATS] = [stats]
        # else:
        # prediction_dict = {}
        # cls_probs = F.softmax(cls_preds, dim=-1)
        # os_probs = F.softmax(os_preds, dim=-1)[:, :, 1:]
        # os_probs[os_probs <= 0.4] = 0
        # prediction_dict[constants.KEY_CLASSES] = cls_probs * os_probs
        # # prediction_dict[constants.KEY_OBJECTNESS] = os_preds

        # image_info = feed_dict[constants.KEY_IMAGE_INFO]
        # variances = [0.1, 0.2]
        # default_boxes = feed_dict['default_boxes'][0]
        # new_default_boxes = torch.cat([
        # default_boxes[:, :2] - default_boxes[:, 2:] / 2,
        # default_boxes[:, :2] + default_boxes[:, 2:] / 2
        # ], 1)
        # xymin = loc2_preds[0, :, :2] * variances[
        # 0] * default_boxes[:, 2:] + new_default_boxes[:, :2]
        # xymax = loc2_preds[0, :, 2:] * variances[
        # 0] * default_boxes[:, 2:] + new_default_boxes[:, 2:]
        # proposals = torch.cat([xymin, xymax], 1).unsqueeze(0)  # [8732,4]

        # image_info = image_info.unsqueeze(-1).unsqueeze(-1)
        # proposals[:, :, ::
        # 2] = proposals[:, :, ::
        # 2] * image_info[:, 1] / image_info[:, 3]
        # proposals[:, :, 1::
        # 2] = proposals[:, :, 1::
        # 2] * image_info[:, 0] / image_info[:, 2]
        # prediction_dict[constants.KEY_BOXES_2D] = proposals
        # return prediction_dict

        image_info = feed_dict[constants.KEY_IMAGE_INFO]

        batch_size = loc1_preds.shape[0]

        anchors = self.anchors.cuda()
        anchors = anchors.repeat(batch_size, 1, 1)

        coder = bbox_coders.build({'type': constants.KEY_BOXES_2D})
        proposals = coder.decode_batch(loc2_preds, anchors).detach()

        # if self.normlize_anchor:
        # denormalize
        # h = image_info[:, 0].unsqueeze(-1).unsqueeze(-1)
        # w = image_info[:, 1].unsqueeze(-1).unsqueeze(-1)
        # proposals[:, :, ::2] = proposals[:, :, ::2] * w
        # proposals[:, :, 1::2] = proposals[:, :, 1::2] * h

        cls_probs = F.softmax(cls_preds.detach(), dim=-1)
        os_probs = F.softmax(os_preds.detach(), dim=-1)[:, :, 1:]
        os_probs[os_probs <= 0.4] = 0
        final_probs = cls_probs * os_probs
        # import ipdb
        # ipdb.set_trace()
        #  final_probs = cls_probs
        image_info = feed_dict[constants.KEY_IMAGE_INFO].unsqueeze(
            -1).unsqueeze(-1)

        prediction_dict = {}
        if self.training:

            # anchors = prediction_dict['anchors']
            anchors_dict = {}
            anchors_dict[constants.KEY_PRIMARY] = anchors
            anchors_dict[constants.KEY_BOXES_2D] = loc1_preds
            anchors_dict[constants.KEY_BOXES_2D_REFINE] = loc2_preds
            anchors_dict[constants.KEY_CLASSES] = cls_preds
            anchors_dict[constants.KEY_OBJECTNESS] = os_preds
            # anchors_dict[constants.KEY_FINAL_PROBS] = final_probs

            gt_dict = {}
            gt_dict[constants.KEY_PRIMARY] = feed_dict[
                constants.KEY_LABEL_BOXES_2D]
            gt_dict[constants.KEY_CLASSES] = None
            gt_dict[constants.KEY_BOXES_2D] = None
            gt_dict[constants.KEY_OBJECTNESS] = None
            gt_dict[constants.KEY_BOXES_2D_REFINE] = None

            auxiliary_dict = {}
            label_boxes_2d = feed_dict[constants.KEY_LABEL_BOXES_2D]
            if self.normlize_anchor:
                label_boxes_2d[:, :, ::
                               2] = label_boxes_2d[:, :, ::2] / image_info[:,
                                                                           1]
                label_boxes_2d[:, :,
                               1::2] = label_boxes_2d[:, :,
                                                      1::2] / image_info[:, 0]
            auxiliary_dict[constants.KEY_BOXES_2D] = label_boxes_2d
            gt_labels = feed_dict[constants.KEY_LABEL_CLASSES]
            auxiliary_dict[constants.KEY_CLASSES] = gt_labels
            auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                constants.KEY_NUM_INSTANCES]
            auxiliary_dict[constants.KEY_PROPOSALS] = anchors

            proposals_dict, targets, stats = self.target_generators.generate_targets(
                anchors_dict, gt_dict, auxiliary_dict, subsample=False)

            # recall
            anchors_dict[constants.KEY_PRIMARY] = proposals
            _, _, second_stage_stats = self.target_generators.generate_targets(
                anchors_dict, gt_dict, auxiliary_dict, subsample=False)

            # precision
            fg_probs, _ = final_probs[:, :, 1:].max(dim=-1)
            fake_match = auxiliary_dict[constants.KEY_FAKE_MATCH]
            second_stage_stats.update(
                Analyzer.analyze_precision(
                    fake_match,
                    fg_probs,
                    feed_dict[constants.KEY_NUM_INSTANCES],
                    thresh=0.3))

            prediction_dict[constants.KEY_STATS] = [stats, second_stage_stats]
            prediction_dict[constants.KEY_TARGETS] = targets
        else:

            prediction_dict[constants.KEY_CLASSES] = final_probs
            # prediction_dict[constants.KEY_OBJECTNESS] = os_preds

            proposals[:, :, ::2] = proposals[:, :, ::2] / image_info[:, 3]
            proposals[:, :, 1::2] = proposals[:, :, 1::2] / image_info[:, 2]
            prediction_dict[constants.KEY_BOXES_2D] = proposals
        return prediction_dict
예제 #27
0
    def forward(self, feed_dict):
        # import ipdb
        # ipdb.set_trace()
        im_info = feed_dict[constants.KEY_IMAGE_INFO]

        prediction_dict = {}

        # base model
        rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'rpn_feat_maps': rpn_feat_maps})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        proposals = prediction_dict['proposals']
        multi_stage_loss_units = []
        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:
                # proposals_dict
                proposals_dict = {}
                proposals_dict[constants.KEY_PRIMARY] = proposals

                # gt_dict
                gt_dict = {}
                gt_dict[constants.KEY_PRIMARY] = feed_dict[constants.
                                                           KEY_LABEL_BOXES_2D]
                gt_dict[constants.KEY_CLASSES] = None
                gt_dict[constants.KEY_BOXES_2D] = None
                gt_dict[constants.KEY_ORIENTS_V3] = None
                gt_dict[constants.KEY_DIMS] = None

                # auxiliary_dict(used for encoding)
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[
                    constants.KEY_STEREO_CALIB_P2]
                auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                auxiliary_dict[constants.KEY_CLASSES] = feed_dict[
                    constants.KEY_LABEL_CLASSES]
                auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[
                    constants.KEY_LABEL_BOXES_3D]
                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals
                auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[
                    constants.KEY_MEAN_DIMS]

                proposals_dict, loss_units, stats = self.target_generators[
                    i].generate_targets(proposals_dict, gt_dict,
                                        auxiliary_dict)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps,
                                                    rois.view(-1, 5),
                                                    im_info[0][:2])

            # shape(N,C,1,1)
            pooled_feat = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat = pooled_feat.mean(3).mean(2)

            rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat)
            rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat)
            rcnn_orient_preds = self.rcnn_orient_preds[i](pooled_feat)
            rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat)

            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            batch_size = rois.shape[0]
            rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
                                                   self.n_classes)
            rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
                                                 self.n_classes)
            if not self.class_agnostic:
                # import ipdb
                # ipdb.set_trace()
                if self.training:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1))
                else:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1))

            rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)
            rcnn_orient_preds = rcnn_orient_preds.view(batch_size, -1,
                                                       4 * self.num_bins)
            rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3)

            if self.training:
                loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores
                loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds
                loss_units[constants.KEY_ORIENTS_V3]['pred'] = rcnn_orient_preds
                loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds
                # import ipdb
                # ipdb.set_trace()
                multi_stage_loss_units.append([
                    loss_units[constants.KEY_CLASSES],
                    loss_units[constants.KEY_BOXES_2D],
                    loss_units[constants.KEY_ORIENTS_V3],
                    loss_units[constants.KEY_DIMS]
                ])
                multi_stage_stats.append(stats)

            # decode for next stage
            coder = bbox_coders.build({'type': constants.KEY_BOXES_2D})
            #  rpn_proposals = proposals
            proposals = coder.decode_batch(rcnn_bbox_preds, proposals).detach()
            coder = bbox_coders.build({'type': constants.KEY_DIMS})
            rcnn_dim_preds = coder.decode_batch(
                rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS],
                rcnn_cls_probs).detach()
            coder = bbox_coders.build({'type': constants.KEY_ORIENTS_V3})
            # use rpn proposals to decode
            rcnn_orient_preds = coder.decode_batch(
                rcnn_orient_preds, self.rcnn_orient_loss.bin_centers,
                proposals, feed_dict[constants.KEY_STEREO_CALIB_P2]).detach()

        if self.training:
            prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            prediction_dict[constants.KEY_STATS] = multi_stage_stats
        else:
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs
            prediction_dict[constants.KEY_ORIENTS_V3] = rcnn_orient_preds

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            proposals[:, :, ::2] = proposals[:, :, ::
                                             2] / image_info[:, 3].unsqueeze(
                                                 -1).unsqueeze(-1)
            proposals[:, :, 1::2] = proposals[:, :, 1::
                                              2] / image_info[:, 2].unsqueeze(
                                                  -1).unsqueeze(-1)
            prediction_dict[constants.KEY_BOXES_2D] = proposals
            prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds
            prediction_dict[constants.KEY_ORIENTS_V3] = rcnn_orient_preds

        return prediction_dict
예제 #28
0
    def forward(self, feed_dict):
        im_info = feed_dict[constants.KEY_IMAGE_INFO]

        prediction_dict = {}

        # base model
        rpn_feat_maps, rcnn_feat_maps = self.feature_extractor.first_stage_feature(
            feed_dict[constants.KEY_IMAGE])
        feed_dict.update({'rpn_feat_maps': rpn_feat_maps})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        proposals = prediction_dict['proposals']
        multi_stage_loss_units = []
        multi_stage_stats = []
        for i in range(self.num_stages):

            if self.training:
                # proposals_dict
                proposals_dict = {}
                proposals_dict[constants.KEY_PRIMARY] = proposals

                # gt_dict
                gt_dict = {}
                gt_dict[constants.KEY_PRIMARY] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                gt_dict[constants.KEY_CLASSES] = None
                gt_dict[constants.KEY_BOXES_2D] = None
                gt_dict[constants.KEY_CORNERS_3D_GRNET] = None
                # gt_dict[constants.KEY_CORNERS_VISIBILITY] = None
                # gt_dict[constants.KEY_ORIENTS_V2] = None
                gt_dict[constants.KEY_DIMS] = None

                # auxiliary_dict(used for encoding)
                auxiliary_dict = {}
                auxiliary_dict[constants.KEY_STEREO_CALIB_P2] = feed_dict[
                    constants.KEY_STEREO_CALIB_P2]
                auxiliary_dict[constants.KEY_BOXES_2D] = feed_dict[
                    constants.KEY_LABEL_BOXES_2D]
                auxiliary_dict[constants.KEY_CLASSES] = feed_dict[
                    constants.KEY_LABEL_CLASSES]
                auxiliary_dict[constants.KEY_BOXES_3D] = feed_dict[
                    constants.KEY_LABEL_BOXES_3D]
                auxiliary_dict[constants.KEY_NUM_INSTANCES] = feed_dict[
                    constants.KEY_NUM_INSTANCES]
                auxiliary_dict[constants.KEY_PROPOSALS] = proposals
                auxiliary_dict[constants.KEY_MEAN_DIMS] = feed_dict[
                    constants.KEY_MEAN_DIMS]
                auxiliary_dict[constants.KEY_IMAGE_INFO] = feed_dict[
                    constants.KEY_IMAGE_INFO]

                proposals_dict, loss_units, stats = self.target_generators[
                    i].generate_targets(proposals_dict, gt_dict,
                                        auxiliary_dict)

                # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
                proposals = proposals_dict[constants.KEY_PRIMARY]
            rois = box_ops.box2rois(proposals)
            pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps,
                                                    rois.view(-1, 5),
                                                    im_info[0][:2])

            # shape(N,C,1,1)
            pooled_feat = self.feature_extractor.second_stage_feature(
                pooled_feat)
            pooled_feat = pooled_feat.mean(3).mean(2)

            rcnn_bbox_preds = self.rcnn_bbox_preds[i](pooled_feat)
            rcnn_cls_scores = self.rcnn_cls_preds[i](pooled_feat)
            rcnn_corners_preds = self.rcnn_corners_preds[i](pooled_feat)
            # rcnn_visibility_preds = self.rcnn_visibility_preds[i](pooled_feat)
            rcnn_dim_preds = self.rcnn_dim_preds[i](pooled_feat)

            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            batch_size = rois.shape[0]
            rcnn_cls_scores = rcnn_cls_scores.view(batch_size, -1,
                                                   self.n_classes)
            rcnn_cls_probs = rcnn_cls_probs.view(batch_size, -1,
                                                 self.n_classes)
            if not self.class_agnostic:
                # import ipdb
                # ipdb.set_trace()
                if self.training:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1))

                else:
                    rcnn_bbox_preds = self.squeeze_bbox_preds(
                        rcnn_bbox_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1))

            # not class_agnostic for dims
            # import ipdb
            # ipdb.set_trace()
            if not self.class_agnostic_3d:
                if self.training:
                    rcnn_dim_preds = self.squeeze_bbox_preds(
                        rcnn_dim_preds,
                        loss_units[constants.KEY_CLASSES]['target'].view(-1),
                        out_c=3)
                else:
                    rcnn_dim_preds = self.squeeze_bbox_preds(
                        rcnn_dim_preds,
                        rcnn_cls_probs.argmax(dim=-1).view(-1),
                        out_c=3)

            rcnn_bbox_preds = rcnn_bbox_preds.view(batch_size, -1, 4)
            rcnn_corners_preds = rcnn_corners_preds.view(
                batch_size, rcnn_bbox_preds.shape[1], -1)
            # rcnn_visibility_preds = rcnn_visibility_preds.view(
            # batch_size, rcnn_bbox_preds.shape[1], -1)
            rcnn_dim_preds = rcnn_dim_preds.view(batch_size, -1, 3)

            if self.training:
                loss_units[constants.KEY_CLASSES]['pred'] = rcnn_cls_scores
                loss_units[constants.KEY_BOXES_2D]['pred'] = rcnn_bbox_preds
                loss_units[constants.KEY_DIMS]['pred'] = rcnn_dim_preds
                loss_units[constants.KEY_CORNERS_3D_GRNET][
                    'pred'] = rcnn_corners_preds
                # loss_units[constants.KEY_CORNERS_VISIBILITY][
                # 'pred'] = rcnn_visibility_preds
                # import ipdb
                # ipdb.set_trace()
                multi_stage_loss_units.append([
                    loss_units[constants.KEY_CLASSES],
                    loss_units[constants.KEY_BOXES_2D],
                    loss_units[constants.KEY_CORNERS_3D_GRNET],
                    loss_units[constants.KEY_DIMS]
                ])
                multi_stage_stats.append(stats)
            else:

                # decode for next stage
                coder = bbox_coders.build({
                    'type':
                    constants.KEY_CORNERS_3D_GRNET
                })
                rcnn_corners_preds = coder.decode_batch(
                    rcnn_corners_preds.detach(), proposals,
                    feed_dict[constants.KEY_STEREO_CALIB_P2])
                coder = bbox_coders.build(
                    self.target_generators[i]
                    .target_generator_config['coder_config'])
                proposals = coder.decode_batch(rcnn_bbox_preds,
                                               proposals).detach()
                coder = bbox_coders.build({'type': constants.KEY_DIMS})
                rcnn_dim_preds = coder.decode_batch(
                    rcnn_dim_preds, feed_dict[constants.KEY_MEAN_DIMS],
                    rcnn_cls_probs).detach()

        if self.training:
            prediction_dict[constants.KEY_TARGETS] = multi_stage_loss_units
            prediction_dict[constants.KEY_STATS] = multi_stage_stats
            prediction_dict[constants.KEY_PROPOSALS] = proposals
        else:
            prediction_dict[constants.KEY_CLASSES] = rcnn_cls_probs

            image_info = feed_dict[constants.KEY_IMAGE_INFO]
            proposals[:, :, ::2] = proposals[:, :, ::
                                             2] / image_info[:, 3].unsqueeze(
                                                 -1).unsqueeze(-1)
            proposals[:, :, 1::2] = proposals[:, :, 1::
                                              2] / image_info[:, 2].unsqueeze(
                                                  -1).unsqueeze(-1)
            # rcnn_corners_preds = coder.decode_batch(
            # rcnn_corners_preds.detach(), proposals)

            # import ipdb
            # ipdb.set_trace()
            # rcnn_corners_preds = torch.bmm(
            # feed_dict[constants.KEY_STEREO_CALIB_P2_ORIG],
            # rcnn_corners_preds)
            # assert rcnn_corners_preds.shape[0] == 1
            # rcnn_corners_preds = geometry_utils.torch_points_3d_to_points_2d(
            # rcnn_corners_preds[0].view(-1, 3),
            # feed_dict[constants.KEY_STEREO_CALIB_P2_ORIG][0]).view(-1, 8,
            # 2)
            N, M = rcnn_corners_preds.shape[:2]
            rcnn_corners_preds = rcnn_corners_preds.view(N, M, 8, 2)
            rcnn_corners_preds[:, :, :,
                               0] = rcnn_corners_preds[:, :, :,
                                                       0] / image_info[:, 3]
            rcnn_corners_preds[:, :, :,
                               1] = rcnn_corners_preds[:, :, :,
                                                       1] / image_info[:, 2]
            prediction_dict[constants.KEY_CORNERS_2D] = rcnn_corners_preds
            prediction_dict[constants.KEY_BOXES_2D] = proposals
            prediction_dict[constants.KEY_DIMS] = rcnn_dim_preds

        if self.training:
            loss_dict = self.loss(prediction_dict, feed_dict)
            return prediction_dict, loss_dict
        else:
            return prediction_dict
예제 #29
0
 def __init__(self, config):
     self.coder = bbox_coders.build(config)
     self.bg_thresh = config.get('bg_thresh', 0)