Ejemplo n.º 1
0
def im_detect_2d(model, data, eval_config, im_orig=None):
    im_info = data['im_info']
    with torch.no_grad():
        prediction = model(data)

    if eval_config.get('feat_vis'):
        featmaps_dict = model.get_feat()
        from utils.visualizer import FeatVisualizer
        feat_visualizer = FeatVisualizer()
        feat_visualizer.visualize_maps(featmaps_dict)

    cls_prob = prediction['rcnn_cls_probs']
    rois = prediction['rois_batch']
    bbox_pred = prediction['rcnn_bbox_preds']
    anchors = prediction['second_rpn_anchors'][0]

    scores = cls_prob
    im_scale = im_info[0][2]
    boxes = rois.data[:, :, 1:5]
    if prediction.get('rois_scores') is not None:
        rois_scores = prediction['rois_scores']
        boxes = torch.cat([boxes, rois_scores], dim=2)

    # visualize rois
    if im_orig is not None and eval_config['rois_vis']:
        visualize_bbox(im_orig.numpy()[0], boxes.cpu().numpy()[0], save=True)
        # visualize_bbox(im_orig.numpy()[0], anchors[0].cpu().numpy()[:100], save=True)

    if eval_config['bbox_reg']:
        # Apply bounding-box regression deltas
        box_deltas = bbox_pred.data
        #  if eval_config['bbox_normalize_targets_precomputed']:
        #  # Optionally normalize targets by a precomputed mean and stdev
        if not eval_config['class_agnostic']:
            boxes = boxes.repeat(1, 1, len(eval_config['classes']) + 1)
        #  box_deltas = box_deltas.view(
        #  -1, 4) * torch.FloatTensor(eval_config[
        #  'bbox_normalize_stds']).cuda() + torch.FloatTensor(
        #  eval_config['bbox_normalize_means']).cuda()
        #  box_deltas = box_deltas.view(eval_config['batch_size'], -1, 4)
        #  else:
        #  box_deltas = box_deltas.view(
        #  -1, 4) * torch.FloatTensor(eval_config[
        #  'bbox_normalize_stds']).cuda() + torch.FloatTensor(
        #  eval_config['bbox_normalize_means']).cuda()
        #  box_deltas = box_deltas.view(eval_config['batch_size'], -1,
        #  4 * len(eval_config['classes']))

        #  pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)

        pred_boxes = model.target_assigner.bbox_coder.decode_batch(
            box_deltas.view(eval_config['batch_size'], -1, 4),
            boxes.view(eval_config['batch_size'], -1, 4))
        pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)

    pred_boxes /= im_scale

    return pred_boxes, scores, rois[:, :, 1:5], anchors
Ejemplo n.º 2
0
    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        # self.reduce = model_config.get('reduce')
        self.reduce = True

        self.visualizer = FeatVisualizer()
Ejemplo n.º 3
0
    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        # self.reduce = model_config.get('reduce')
        self.reduce = True

        self.visualizer = FeatVisualizer()

        self.num_bins = 4

        self.train_3d = False

        self.train_2d = not self.train_3d

        # more accurate bbox for 3d prediction
        if self.train_3d:
            fg_thresh = 0.6
        else:
            fg_thresh = 0.5
        model_config['target_assigner_config']['fg_thresh'] = fg_thresh

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        self.profiler = Profiler()

        self.h_cat = False
Ejemplo n.º 4
0
    def test_corners_3d(self, dataloader, model, logger):
        self.logger.info('Start testing')
        num_samples = len(dataloader)

        if self.feat_vis:
            # enable it before forward pass
            model.enable_feat_vis()
        end_time = 0

        for step, data in enumerate(dataloader):
            # start_time = time.time()
            data = common.to_cuda(data)
            image_path = data[constants.KEY_IMAGE_PATH]

            with torch.no_grad():
                prediction, _, _ = model(data)
            # duration_time = time.time() - start_time

            if self.feat_vis:
                featmaps_dict = model.get_feat()
                from utils.visualizer import FeatVisualizer
                feat_visualizer = FeatVisualizer()
                feat_visualizer.visualize_maps(featmaps_dict)

            # initialize dets for each classes
            # dets = [[] for class_ind in range(self.n_classes)]

            scores = prediction[constants.KEY_CLASSES]
            boxes_2d = prediction[constants.KEY_BOXES_2D]
            #  dims = prediction[constants.KEY_DIMS]
            corners_2d = prediction[constants.KEY_CORNERS_2D]
            #  import ipdb
            #  ipdb.set_trace()
            p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG]

            # rcnn_3d = prediction['rcnn_3d']
            batch_size = scores.shape[0]
            scores = scores.view(-1, self.n_classes)
            new_scores = torch.zeros_like(scores)
            _, scores_argmax = scores.max(dim=-1)
            row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax)
            new_scores[row, scores_argmax] = scores[row, scores_argmax]
            scores = new_scores.view(batch_size, -1, self.n_classes)

            #  if step == 6:
            #  import ipdb
            #  ipdb.set_trace()

            for batch_ind in range(batch_size):
                boxes_2d_per_img = boxes_2d[batch_ind]
                scores_per_img = scores[batch_ind]
                #  dims_per_img = dims[batch_ind]
                corners_2d_per_img = corners_2d[batch_ind]
                p2_per_img = p2[batch_ind]

                num_cols = corners_2d.shape[-1]
                dets = [np.zeros((0, 8, num_cols), dtype=np.float32)]
                dets_2d = [np.zeros((0, 4), dtype=np.float32)]

                for class_ind in range(1, self.n_classes):
                    # cls thresh
                    inds = torch.nonzero(
                        scores_per_img[:, class_ind] > self.thresh).view(-1)
                    threshed_scores_per_img = scores_per_img[inds, class_ind]
                    if inds.numel() > 0:
                        # if self.class_agnostic:
                        threshed_boxes_2d_per_img = boxes_2d_per_img[inds]
                        #  threshed_dims_per_img = dims_per_img[inds]
                        threshed_corners_2d_per_img = corners_2d_per_img[inds]
                        # threshed_rcnn_3d_per_img = rcnn_3d_per_img[inds]
                        # else:
                        # threshed_boxes_2d_per_img = boxes_2d_per_img[
                        # inds, class_ind * 4:class_ind * 4 + 4]
                        # concat boxes and scores
                        threshed_dets_per_img = torch.cat(
                            [
                                threshed_boxes_2d_per_img,
                                threshed_scores_per_img.unsqueeze(-1),
                                #  threshed_dims_per_img,
                            ],
                            dim=-1)

                        # sort by scores
                        _, order = torch.sort(threshed_scores_per_img, 0, True)
                        threshed_dets_per_img = threshed_dets_per_img[order]
                        threshed_corners_2d_per_img = threshed_corners_2d_per_img[
                            order]

                        # nms
                        keep = nms(threshed_dets_per_img[:, :4],
                                   threshed_dets_per_img[:, 4],
                                   self.nms).view(-1).long()
                        nms_dets_per_img = threshed_dets_per_img[keep].detach(
                        ).cpu().numpy()
                        nms_corners_2d_per_img = threshed_corners_2d_per_img[
                            keep].detach().cpu().numpy()

                        dets.append(nms_corners_2d_per_img)
                        dets_2d.append(nms_dets_per_img[:, :4])
                    else:
                        dets.append(
                            np.zeros((0, 8, num_cols), dtype=np.float32))
                        dets_2d.append(np.zeros((0, 4)))

                # import ipdb
                # ipdb.set_trace()
                corners = np.concatenate(dets, axis=0)
                dets_2d = np.concatenate(dets_2d, axis=0)
                corners_2d = None
                corners_3d = None
                if num_cols == 3:
                    corners_3d = corners
                else:
                    corners_2d = corners

                self.visualizer.render_image_corners_2d(
                    image_path[0],
                    boxes_2d=dets_2d,
                    corners_2d=corners_2d,
                    corners_3d=corners_3d,
                    p2=p2_per_img.cpu().numpy())

                duration_time = time.time() - end_time
                #  label_path = self._generate_label_path(image_path[batch_ind])
                #  self.save_mono_3d_dets(dets, label_path)
                sys.stdout.write('\r{}/{},duration: {}'.format(
                    step + 1, num_samples, duration_time))
                sys.stdout.flush()

                end_time = time.time()
Ejemplo n.º 5
0
    def test_super_nms(self, dataloader, model, logger):
        self.logger.info('Start testing')
        num_samples = len(dataloader)

        if self.feat_vis:
            # enable it before forward pass
            model.enable_feat_vis()
        end_time = 0

        for step, data in enumerate(dataloader):
            # start_time = time.time()
            data = common.to_cuda(data)
            image_path = data[constants.KEY_IMAGE_PATH]

            with torch.no_grad():
                prediction = model(data)
            # duration_time = time.time() - start_time

            if self.feat_vis:
                featmaps_dict = model.get_feat()
                from utils.visualizer import FeatVisualizer
                feat_visualizer = FeatVisualizer()
                feat_visualizer.visualize_maps(featmaps_dict)

            # initialize dets for each classes
            # dets = [[] for class_ind in range(self.n_classes)]
            dets = [[]]

            scores = prediction[constants.KEY_CLASSES]
            boxes_2d = prediction[constants.KEY_BOXES_2D]

            batch_size = scores.shape[0]
            # scores = scores.view(-1, self.n_classes)
            # new_scores = torch.zeros_like(scores)
            # _, scores_argmax = scores.max(dim=-1)
            # row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax)
            # new_scores[row, scores_argmax] = scores[row, scores_argmax]
            # scores = new_scores.view(batch_size, -1, self.n_classes)

            #  if step == 6:
            #  import ipdb
            #  ipdb.set_trace()

            for batch_ind in range(batch_size):
                boxes_2d_per_img = boxes_2d[batch_ind]
                scores_per_img = scores[batch_ind]
                for class_ind in range(1, self.n_classes):
                    # cls thresh
                    # import ipdb
                    # ipdb.set_trace()
                    inds = torch.nonzero(
                        scores_per_img[:, class_ind] > 0.01).view(-1)
                    threshed_scores_per_img = scores_per_img[inds, class_ind]
                    if inds.numel() > 0:
                        # if self.class_agnostic:
                        threshed_boxes_2d_per_img = boxes_2d_per_img[inds]
                        # else:
                        # threshed_boxes_2d_per_img = boxes_2d_per_img[
                        # inds, class_ind * 4:class_ind * 4 + 4]
                        # concat boxes and scores
                        threshed_dets_per_img = torch.cat([
                            threshed_boxes_2d_per_img,
                            threshed_scores_per_img.unsqueeze(-1),
                        ],
                                                          dim=-1)

                        # sort by scores
                        _, order = torch.sort(threshed_scores_per_img, 0, True)
                        threshed_dets_per_img = threshed_dets_per_img[order]

                        # nms
                        # keep = nms(threshed_dets_per_img[:, :4],
                        # threshed_dets_per_img[:, 4],
                        # self.nms).view(-1).long()
                        keep = box_ops.super_nms(threshed_dets_per_img[:, :4],
                                                 0.8,
                                                 nms_num=3,
                                                 loop_time=2)
                        nms_dets_per_img = threshed_dets_per_img[keep].detach(
                        ).cpu().numpy()

                        dets.append(nms_dets_per_img)
                    else:
                        dets.append([])

                duration_time = time.time() - end_time
                label_path = self._generate_label_path(image_path[batch_ind])
                self.save_dets(dets, label_path)
                sys.stdout.write('\r{}/{},duration: {}'.format(
                    step + 1, num_samples, duration_time))
                sys.stdout.flush()

                end_time = time.time()
Ejemplo n.º 6
0
    def test_3d(self, dataloader, model, logger):
        self.logger.info('Start testing')
        num_samples = len(dataloader)

        if self.feat_vis:
            # enable it before forward pass
            model.enable_feat_vis()
        end_time = 0

        for step, data in enumerate(dataloader):
            # start_time = time.time()
            data = common.to_cuda(data)
            image_path = data[constants.KEY_IMAGE_PATH]

            with torch.no_grad():
                prediction, _, _ = model(data)
            # duration_time = time.time() - start_time

            if self.feat_vis:
                featmaps_dict = model.get_feat()
                from utils.visualizer import FeatVisualizer
                feat_visualizer = FeatVisualizer()
                feat_visualizer.visualize_maps(featmaps_dict)

            # initialize dets for each classes
            # dets = [[] for class_ind in range(self.n_classes)]
            dets = [[]]

            scores = prediction[constants.KEY_CLASSES]
            boxes_2d = prediction[constants.KEY_BOXES_2D]
            dims = prediction[constants.KEY_DIMS]
            orients = prediction[constants.KEY_ORIENTS_V2]
            p2 = data[constants.KEY_STEREO_CALIB_P2_ORIG]

            # rcnn_3d = prediction['rcnn_3d']
            batch_size = scores.shape[0]
            scores = scores.view(-1, self.n_classes)
            new_scores = torch.zeros_like(scores)
            _, scores_argmax = scores.max(dim=-1)
            row = torch.arange(0, scores_argmax.numel()).type_as(scores_argmax)
            new_scores[row, scores_argmax] = scores[row, scores_argmax]
            scores = new_scores.view(batch_size, -1, self.n_classes)

            #  if step == 6:
            #  import ipdb
            #  ipdb.set_trace()

            for batch_ind in range(batch_size):
                boxes_2d_per_img = boxes_2d[batch_ind]
                scores_per_img = scores[batch_ind]
                dims_per_img = dims[batch_ind]
                orients_per_img = orients[batch_ind]
                p2_per_img = p2[batch_ind]
                # rcnn_3d_per_img = rcnn_3d[batch_ind]
                for class_ind in range(1, self.n_classes):
                    # cls thresh
                    inds = torch.nonzero(
                        scores_per_img[:, class_ind] > self.thresh).view(-1)
                    threshed_scores_per_img = scores_per_img[inds, class_ind]
                    if inds.numel() > 0:
                        # if self.class_agnostic:
                        threshed_boxes_2d_per_img = boxes_2d_per_img[inds]
                        threshed_dims_per_img = dims_per_img[inds]
                        threshed_orients_per_img = orients_per_img[inds]
                        # threshed_rcnn_3d_per_img = rcnn_3d_per_img[inds]
                        # else:
                        # threshed_boxes_2d_per_img = boxes_2d_per_img[
                        # inds, class_ind * 4:class_ind * 4 + 4]
                        # concat boxes and scores
                        threshed_dets_per_img = torch.cat([
                            threshed_boxes_2d_per_img,
                            threshed_scores_per_img.unsqueeze(-1),
                            threshed_dims_per_img,
                            threshed_orients_per_img.unsqueeze(-1)
                        ],
                                                          dim=-1)

                        # sort by scores
                        _, order = torch.sort(threshed_scores_per_img, 0, True)
                        threshed_dets_per_img = threshed_dets_per_img[order]
                        # threshed_rcnn_3d_per_img = threshed_rcnn_3d_per_img[order]

                        # nms
                        keep = nms(threshed_dets_per_img[:, :4],
                                   threshed_dets_per_img[:, 4],
                                   self.nms).view(-1).long()
                        nms_dets_per_img = threshed_dets_per_img[keep].detach(
                        ).cpu().numpy()
                        # nms_rcnn_3d_per_img = threshed_rcnn_3d_per_img[keep].detach().cpu().numpy()

                        # calculate location
                        location = geometry_utils.calc_location(
                            nms_dets_per_img[:, 5:8], nms_dets_per_img[:, :5],
                            nms_dets_per_img[:, 8],
                            p2_per_img.cpu().numpy())
                        # import ipdb
                        # ipdb.set_trace()
                        # location, _ = mono_3d_postprocess_bbox(
                        # nms_rcnn_3d_per_img, nms_dets_per_img[:, :5],
                        # p2_per_img.cpu().numpy())
                        nms_dets_per_img = np.concatenate([
                            nms_dets_per_img[:, :5], nms_dets_per_img[:, 5:8],
                            location, nms_dets_per_img[:, -1:]
                        ],
                                                          axis=-1)
                        # nms_dets_per_img = np.concatenate(
                        # [nms_dets_per_img[:, :5], location], axis=-1)

                        dets.append(nms_dets_per_img)
                    else:
                        dets.append([])

                duration_time = time.time() - end_time
                label_path = self._generate_label_path(image_path[batch_ind])
                self.save_mono_3d_dets(dets, label_path)
                sys.stdout.write('\r{}/{},duration: {}'.format(
                    step + 1, num_samples, duration_time))
                sys.stdout.flush()

                end_time = time.time()
Ejemplo n.º 7
0
def oft_test(eval_config, data_loader, model):
    """
    Only one image in batch is supported
    """
    # import ipdb
    # ipdb.set_trace()
    num_samples = len(data_loader)
    end_time = 0
    for i, data in enumerate(data_loader):
        img_file = data['img_name']
        start_time = time.time()

        with torch.no_grad():
            data = to_cuda(data)
            prediction = model(data)

        if eval_config.get('feat_vis'):
            featmaps_dict = model.get_feat()
            from utils.visualizer import FeatVisualizer
            feat_visualizer = FeatVisualizer()
            feat_visualizer.visualize_maps(featmaps_dict)

        pred_probs_3d = prediction['pred_probs_3d']
        pred_boxes_3d = prediction['pred_boxes_3d']

        duration_time = time.time() - start_time

        scores = pred_probs_3d.squeeze()
        pred_boxes_3d = pred_boxes_3d.squeeze()

        classes = eval_config['classes']
        thresh = eval_config['thresh']
        thresh = 0.1
        #  import ipdb
        #  ipdb.set_trace()

        dets = []
        # nms
        for j in range(1, len(classes)):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            # if there is det
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                cls_scores, order = torch.sort(cls_scores, 0, True)
                if eval_config['class_agnostic']:
                    pred_boxes_3d = pred_boxes_3d[inds, :]
                else:
                    pred_boxes_3d = pred_boxes_3d[inds][:, j * 4:(j + 1) * 4]

                pred_boxes_3d = pred_boxes_3d[order]

                # keep = nms(pred_boxes_3d, eval_config['nms'])

                # pred_boxes_3d = pred_boxes_3d[keep.view(-1).long()]

                pred_boxes_3d = pred_boxes_3d.detach().cpu().numpy()
                p2 = data['orig_p2'][0].detach().cpu().numpy()
                cls_scores = cls_scores.cpu().numpy()

                cls_boxes = proj_3dTo2d(pred_boxes_3d, p2)

                # import ipdb
                # ipdb.set_trace()
                cls_dets = np.concatenate(
                    (cls_boxes, cls_scores[..., np.newaxis]), 1)

                # img filter(ignore outside of image)
                img_filter = get_img_filter(cls_dets)
                final_dets = np.concatenate([cls_dets, pred_boxes_3d], axis=-1)
                final_dets = final_dets[img_filter]
                dets.append(final_dets)

            else:
                dets.append([])

        save_dets(dets, img_file[0], 'kitti', eval_config['eval_out'])

        sys.stdout.write('\r{}/{},duration: {}'.format(i + 1, num_samples,
                                                       duration_time))
        sys.stdout.flush()