def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
            rgb_image = cv_bgr_image[..., ::-1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib_p2 = calib_utils.read_calibration(
                self.calib_dir, int(sample_name)).p2

            point_cloud = self.kitti_utils.get_point_cloud(
                self.bev_source, img_idx, image_shape)

            # Check if the run is training and if the train augmentation is set
            if self.train_val_test == 'train' and self.is_train_aug:
                # Generate a random aug probability
                is_aug = np.random.uniform(0, 1)
                if is_aug > 0.5:
                    # Make a random choice from the list of available aug options
                    random_aug = random.choice(self.augs)
                    # Apply the corresponding aug method to the image
                    image_input[:, :,
                                0:3] = getattr(kitti_aug,
                                               random_aug)(image_input[:, :,
                                                                       0:3])

            # Augmentation (Flipping)
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels
                ]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :,
                            0:3] = kitti_aug.apply_pca_jitter(image_input[:, :,
                                                                          0:3])

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps
            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane)

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))

            sample_dict = {
                constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_ANCHORS: label_anchors,
                constants.KEY_LABEL_CLASSES: label_classes,
                constants.KEY_IMAGE_INPUT: image_input,
                constants.KEY_BEV_INPUT: bev_input,
                constants.KEY_ANCHORS_INFO: anchors_info,
                constants.KEY_POINT_CLOUD: point_cloud,
                constants.KEY_GROUND_PLANE: ground_plane,
                constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
                constants.KEY_SAMPLE_NAME: sample_name,
                constants.KEY_SAMPLE_AUGS: sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts
Exemple #2
0
    def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
            rgb_image = cv_bgr_image[..., ::-1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib = calib_utils.read_calibration(
                self.calib_dir, int(sample_name))
            stereo_calib_p2 = stereo_calib.p2

            point_cloud = self.kitti_utils.get_point_cloud(
                self.bev_source, img_idx, image_shape)

            # Augmentation (Flipping)
            # WZN: the flipping augmentation flips both image(in camera frame), pointcloud (in Lidar frame), and calibration
            #matrix(between cam and Lidar) so the correspondence is still true.
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels
                ]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :,
                            0:3] = kitti_aug.apply_pca_jitter(image_input[:, :,
                                                                          0:3])

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps

            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane, output_indices=self.output_indices)
            #WZN produce input for sparse pooling
            if self.output_indices:
                voxel_indices = bev_images[1]
                pts_in_voxel = bev_images[2]
                bev_images = bev_images[0]

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))

            #import pdb
            #pdb.set_trace()
            #WZN produce input for sparse pooling
            if self.output_indices:
                sparse_pooling_input1 = produce_sparse_pooling_input(
                    gen_sparse_pooling_input_avod(
                        pts_in_voxel, voxel_indices, stereo_calib,
                        [image_shape[1], image_shape[0]],
                        bev_input.shape[0:2]),
                    stride=[1, 1])
                #WZN: Note here avod padded the vgg input by 4, so add it
                bev_input_padded = np.copy(bev_input.shape[0:2])
                bev_input_padded[0] = bev_input_padded[0] + 4
                sparse_pooling_input2 = produce_sparse_pooling_input(
                    gen_sparse_pooling_input_avod(
                        pts_in_voxel, voxel_indices, stereo_calib,
                        [image_shape[1], image_shape[0]], bev_input_padded),
                    stride=[8, 8])
                sparse_pooling_input = [
                    sparse_pooling_input1, sparse_pooling_input2
                ]
            else:
                sparse_pooling_input = None

            sample_dict = {
                constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_ANCHORS: label_anchors,
                constants.KEY_LABEL_CLASSES: label_classes,
                constants.KEY_IMAGE_INPUT: image_input,
                constants.KEY_BEV_INPUT: bev_input,
                #WZN: for sparse pooling
                constants.KEY_SPARSE_POOLING_INPUT: sparse_pooling_input,
                constants.KEY_ANCHORS_INFO: anchors_info,
                constants.KEY_POINT_CLOUD: point_cloud,
                constants.KEY_GROUND_PLANE: ground_plane,
                constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
                constants.KEY_SAMPLE_NAME: sample_name,
                constants.KEY_SAMPLE_AUGS: sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts
    def _calculate_anchors_info(self, all_anchor_boxes_bev,
                                empty_anchor_filter, gt_labels):
        """Calculates the list of anchor information in the format:
            N x 8 [max_gt_2d_iou_r, max_gt_2d_iou_h, (6 x offsets), class_index]
                max_gt_out - highest 2D iou with any ground truth box, using [anchor_r vs gt_r] or [anchor_h vs gt_h]
                offsets - encoded offsets [dx, dy, d_dimx, d_dimy, d_angle, angle_face_class_index, (-180,0) or (0,180)]
                class_index - the anchor's class as an index
                    (e.g. 0 or 1, for "Background" or "Car")

        Args:
            all_anchor_boxes_3d: list of anchors in box_3d format
                N x [xc, yc, w, h, angle]
            empty_anchor_filter: boolean mask of which anchors are non empty
            gt_labels: list of Object Label data format containing ground truth
                labels to generate positives/negatives from.

        Returns:
            list of anchor info
        """
        # Check for ground truth objects
        if len(gt_labels) == 0:
            raise Warning("No valid ground truth label to generate anchors.")

        kitti_utils = self._dataset.kitti_utils

        # Filter empty anchors
        anchor_indices = np.where(empty_anchor_filter)[0]
        anchors = all_anchor_boxes_bev[empty_anchor_filter]

        # Convert anchor_boxes_3d to anchor format
        #anchors = box_bev_encoder.box_bev_to_anchor(anchor_boxes_bev)

        # Convert gt to boxes_3d -> anchors -> iou format
        gt_boxes_3d = np.asarray([
            box_3d_encoder.object_label_to_box_3d(gt_obj)
            for gt_obj in gt_labels
        ])
        gt_anchors_norm, _ = box_3d_projector.project_to_bev_box(
            gt_boxes_3d, self._area_extents[[0, 2]])
        #bev_image_size = kitti_utils.area_extents / kitti_utils.voxel_size
        bev_map_h, bev_map_w = self._bev_shape
        #(N, 5) , (5, ) coorespondence element multiplification
        gt_anchors = np.multiply(
            gt_anchors_norm,
            np.array([bev_map_w, bev_map_h, bev_map_w, bev_map_h, 1]))
        iou_type = self.mini_batch_utils.retinanet_iou_type
        if iou_type == '2d_rotate':
            # Convert anchors to 2d iou format
            anchors_for_2d_iou_r = anchors
            gt_boxes_for_2d_iou_r = gt_anchors
        elif iou_type == '2d':
            # Convert anchors to 3d iou format for calculation
            anchors_for_2d_iou_h = box_bev_encoder.box_bev_to_iou_h_format(
                anchors)
            anchors_for_2d_iou_h = anchors_for_2d_iou_h.astype(np.int32)
            gt_boxes_for_2d_iou_h = box_bev_encoder.box_bev_to_iou_h_format(
                gt_anchors)
            gt_boxes_for_2d_iou_h = gt_boxes_for_2d_iou_h.astype(np.int32)
        else:
            raise ValueError('Invalid retinanet iou_type {}', iou_type)
        # Initialize sample and offset lists
        num_anchors = len(anchors)
        all_info = np.zeros((num_anchors, self.mini_batch_utils.col_length))
        # Update anchor indices
        all_info[:, self.mini_batch_utils.col_anchor_indices] = anchor_indices

        # For each of the labels, generate samples
        for gt_idx in range(len(gt_labels)):

            gt_obj = gt_labels[gt_idx]
            gt_box_3d = box_3d_encoder.object_label_to_box_3d(gt_obj)

            # Get 2D or 3D IoU for every anchor
            if self.mini_batch_utils.retinanet_iou_type == '2d':
                gt_box_for_2d_iou_h = gt_boxes_for_2d_iou_h[gt_idx]
                ious = evaluation.two_d_iou(gt_box_for_2d_iou_h,
                                            anchors_for_2d_iou_h)
            elif self.mini_batch_utils.retinanet_iou_type == '2d_rotate':
                gt_box_for_2d_iou_r = gt_boxes_for_2d_iou_r[gt_idx]
                ious = evaluation.two_d_rotate_iou(gt_box_for_2d_iou_r,
                                                   anchors_for_2d_iou_r)

            # Only update indices with a higher iou than before
            update_indices = np.greater(
                ious, all_info[:, self.mini_batch_utils.col_ious])

            # Get ious to update
            ious_to_update = ious[update_indices]

            # Calculate offsets, use 3D iou to get highest iou
            anchors_to_update = anchors[update_indices]
            facing_obj_head = gt_obj.ry >= 0  #camera facing object's head.
            gt_anchor = gt_anchors[gt_idx]
            #turns (-pi, pi) to (-pi, 0) for gt_anchor's angle
            if facing_obj_head:
                gt_anchor[-1] -= np.pi
            offsets_boxes = anchor_bev_encoder.anchor_to_offset(
                anchors_to_update, gt_anchor)
            gt_anchor_pred = anchor_bev_encoder.offset_to_anchor(
                anchors_to_update, offsets_boxes)
            #y axis 3d value
            n_anchor = offsets_boxes.shape[0]
            anchor_h = anchor_bev_encoder.get_default_anchor_h(n_anchor, 'np')
            gt_h = [gt_obj.t[1], gt_obj.h]
            offsets_h = anchor_bev_encoder.anchor_to_offset_h(anchor_h, gt_h)
            gt_anchors_angle = np.zeros_like(offsets_boxes[:, 0],
                                             dtype=np.int) + gt_obj.ry
            offsets_angle_cls = orientation_encoder.orientation_to_angle_cls(
                gt_anchors_angle)
            offsets = np.hstack(
                [offsets_boxes, offsets_h, offsets_angle_cls[:, np.newaxis]])
            # Convert gt type to index
            class_idx = kitti_utils.class_str_to_index(gt_obj.type)
            # Update anchors info (indices already updated)
            # [index, iou, (offsets), class_index]
            all_info[update_indices,
                     self.mini_batch_utils.col_ious] = ious_to_update
            all_info[update_indices, self.mini_batch_utils.col_offsets_lo:self.
                     mini_batch_utils.col_offsets_hi] = offsets
            all_info[update_indices,
                     self.mini_batch_utils.col_class_idx] = class_idx
            debug = False  #True
            if debug:
                print(f'gt obj:{gt_box_3d}, gt anchor bev: {gt_anchor}')
                print(f'anchors_to_update: {anchors_to_update[:1]}')
                print(f'update at all_info: \n{all_info[update_indices][:1]}')
                print(f'gt_from_anchor_offsets:\n{gt_anchor_pred[:1]}')

        return all_info
def main():
    """This demo shows RPN proposals and AVOD predictions in the
    3D point cloud.

    Keys:
        F1: Toggle proposals
        F2: Toggle predictions
        F3: Toggle 3D voxel grid
        F4: Toggle point cloud

        F5: Toggle easy ground truth objects (Green)
        F6: Toggle medium ground truth objects (Orange)
        F7: Toggle hard ground truth objects (Red)
        F8: Toggle all ground truth objects (default off)

        F9: Toggle ground slice filter (default off)
        F10: Toggle offset slice filter (default off)
    """

    ##############################
    # Options
    ##############################
    rpn_score_threshold = 0.1
    avod_score_threshold = 0.1

    proposals_line_width = 1.0
    predictions_line_width = 3.0
    show_orientations = True

    point_cloud_source = 'depth'

    # Config file folder, default (<avod_root>/data/outputs/<checkpoint_name>)
    config_dir = None

    checkpoint_name = 'pyramid_cars_with_aug_example'
    global_step = None  # Latest checkpoint
    global_step = 83000

    #data_split = 'val_half'
    data_split = 'val'
    # data_split = 'test'

    # Show 3D iou text
    draw_ious_3d = True

    name_list =[]


    #name_file = '/media/wavelab/d3cd89ab-7705-4996-94f3-01da25ba8f50/moosey/val.txt'

    #with open(name_file) as f:
        #for line in f:
            #newline = line.replace("\n","")
            #name_list.append(newline)


    #name_list =['0000000003','0000000009','0000000016','0000000233','0000000234','0000000236','0000000422','0000000473','0000000490','0000000494','0000000547','0000000655',\
                #'0000000679','0000000690','0000000692','0000000781']
    name_list =['0000000004']

    for names in name_list:

        sample_name = names
        #sample_name = None

        # # # Cars # # #
        # sample_name = '000050'
        # sample_name = '000104'
        # sample_name = '000169'
        # sample_name = '000191'
        # sample_name = '000360'
        # sample_name = '001783'
        # sample_name = '001820'

        # val split
        # sample_name = '000181'
        # sample_name = '000751'
        # sample_name = '000843'
        # sample_name = '000944'
        # sample_name = '006338'

        # # # People # # #
        # val_half split
        # sample_name = '000001'  # Hard, 1 far cyc
        # sample_name = '000005'  # Easy, 1 ped
        # sample_name = '000122'  # Easy, 1 cyc
        # sample_name = '000134'  # Hard, lots of people
        # sample_name = '000167'  # Medium, 1 ped, 2 cycs
        # sample_name = '000187'  # Medium, 1 ped on left
        # sample_name = '000381'  # Easy, 1 ped
        # sample_name = '000398'  # Easy, 1 ped
        # sample_name = '000401'  # Hard, obscured peds
        # sample_name = '000407'  # Easy, 1 ped
        # sample_name = '000448'  # Hard, several far people
        # sample_name = '000486'  # Hard 2 obscured peds
        # sample_name = '000509'  # Easy, 1 ped
        # sample_name = '000718'  # Hard, lots of people
        # sample_name = '002216'  # Easy, 1 cyc

        # val split
        # sample_name = '000015'
        # sample_name = '000048'
        # sample_name = '000058'
        # sample_name = '000076'    # Medium, few ped, 1 cyc
        # sample_name = '000108'
        # sample_name = '000118'
        # sample_name = '000145'
        # sample_name = '000153'
        # sample_name = '000186'
        # sample_name = '000195'
        # sample_name = '000199'
        # sample_name = '000397'
        # sample_name = '004425'
        # sample_name = '004474'    # Hard, many ped, 1 cyc
        # sample_name = '004657'    # Hard, Few cycl, few ped
        # sample_name = '006071'
        # sample_name = '006828'    # Hard, Few cycl, few ped
        # sample_name = '006908'    # Hard, Few cycl, few ped
        # sample_name = '007412'
        # sample_name = '007318'    # Hard, Few cycl, few ped

        ##############################
        # End of Options
        ##############################

        if data_split == 'test':
            draw_ious_3d = False

        if config_dir is None:
            config_dir = avod.root_dir() + '/data/outputs/' + checkpoint_name

        # Parse experiment config
        pipeline_config_file = \
            config_dir + '/' + checkpoint_name + '.config'
        _, _, _, dataset_config = \
            config_builder_util.get_configs_from_pipeline_file(
                pipeline_config_file, is_training=False)

        dataset_config.data_split = data_split

        if data_split == 'test':
            dataset_config.data_split_dir = 'testing'
            dataset_config.has_labels = False

        dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                     use_defaults=False)

        # Random sample
        if sample_name is None:
            sample_idx = np.random.randint(0, dataset.num_samples)
            sample_name = dataset.sample_names[sample_idx]

        ##############################
        # Setup Paths
        ##############################
        img_idx = int(sample_name)

        # Text files directory
        proposals_and_scores_dir = avod.root_dir() + \
            '/data/outputs/' + checkpoint_name + '/predictions' +  \
            '/proposals_and_scores/' + dataset.data_split

        predictions_and_scores_dir = avod.root_dir() + \
            '/data/outputs/' + checkpoint_name + '/predictions' +  \
            '/final_predictions_and_scores/' + dataset.data_split

        # Get checkpoint step
        steps = os.listdir(proposals_and_scores_dir)
        steps.sort(key=int)
        print('Available steps: {}'.format(steps))

        # Use latest checkpoint if no index provided
        if global_step is None:
            global_step = steps[-1]

        # Output images directory
        img_out_dir = avod.root_dir() + '/data/outputs/' + checkpoint_name + \
            '/predictions/images_3d/{}/{}/{}'.format(dataset.data_split,
                                                     global_step,
                                                     rpn_score_threshold)

        if not os.path.exists(img_out_dir):
            os.makedirs(img_out_dir)

        ##############################
        # Proposals
        ##############################
        # Load proposals from files
        proposals_and_scores = np.loadtxt(proposals_and_scores_dir +
                                          "/{}/{}.txt".format(global_step,
                                                              sample_name))

        proposals = proposals_and_scores[:, 0:7]
        proposal_scores = proposals_and_scores[:, 7]

        rpn_score_mask = proposal_scores > rpn_score_threshold

        proposals = proposals[rpn_score_mask]
        proposal_scores = proposal_scores[rpn_score_mask]
        print('Proposals:', len(proposal_scores), proposal_scores)

        proposal_objs = \
            [box_3d_encoder.box_3d_to_object_label(proposal,
                                                   obj_type='Proposal')
             for proposal in proposals]

        ##############################
        # Predictions
        ##############################
        # Load proposals from files
        predictions_and_scores = np.loadtxt(predictions_and_scores_dir +
                                            "/{}/{}.txt".format(
                                                global_step,
                                                sample_name)).reshape(-1, 9)

        prediction_boxes_3d = predictions_and_scores[:, 0:7]
        prediction_scores = predictions_and_scores[:, 7]
        prediction_types = np.asarray(predictions_and_scores[:, 8], dtype=np.int32)

        avod_score_mask = prediction_scores >= avod_score_threshold
        prediction_boxes_3d = prediction_boxes_3d[avod_score_mask]
        prediction_scores = prediction_scores[avod_score_mask]
        print('Predictions: ', len(prediction_scores), prediction_scores)

        final_predictions = np.copy(prediction_boxes_3d)

        # # Swap l, w for predictions where w > l
        # swapped_indices = predictions[:, 4] > predictions[:, 3]
        # final_predictions[swapped_indices, 3] = predictions[swapped_indices, 4]
        # final_predictions[swapped_indices, 4] = predictions[swapped_indices, 3]

        prediction_objs = []
        for pred_idx in range(len(final_predictions)):
            prediction_box_3d = final_predictions[pred_idx]
            prediction_type = dataset.classes[prediction_types[pred_idx]]
            prediction_obj = box_3d_encoder.box_3d_to_object_label(
                prediction_box_3d, obj_type=prediction_type)
            prediction_objs.append(prediction_obj)

        ##############################
        # Ground Truth
        ##############################
        if dataset.has_labels:
            # Get ground truth labels
            easy_gt_objs, medium_gt_objs, \
                hard_gt_objs, all_gt_objs = \
                demo_utils.get_gts_based_on_difficulty(dataset, img_idx)
        else:
            easy_gt_objs = medium_gt_objs = hard_gt_objs = all_gt_objs = []

        ##############################
        # 3D IoU
        ##############################
        if draw_ious_3d:
            # Convert to box_3d
            all_gt_boxes_3d = [box_3d_encoder.object_label_to_box_3d(gt_obj)
                               for gt_obj in all_gt_objs]
            pred_boxes_3d = [box_3d_encoder.object_label_to_box_3d(pred_obj)
                             for pred_obj in prediction_objs]
            max_ious_3d = demo_utils.get_max_ious_3d(all_gt_boxes_3d,
                                                     pred_boxes_3d)

        ##############################
        # Point Cloud
        ##############################
        image_path = dataset.get_rgb_image_path(sample_name)
        image = cv2.imread(image_path)

        print("***************")
        print(point_cloud_source)
        print(img_idx)
        print(image.shape)

        point_cloud = dataset.kitti_utils.get_point_cloud(point_cloud_source,
                                                          img_idx,
                                                          image_shape=image.shape)


        print("This is the shape of the point_cloud")
        print(point_cloud.shape)
        point_cloud = np.asarray(point_cloud)

        # Filter point cloud to extents
        area_extents = np.asarray([[-40, 40], [-5, 3], [0, 70]])
        bev_extents = area_extents[[0, 2]]

        points = point_cloud.T
        point_filter = obj_utils.get_point_filter(point_cloud, area_extents)
        points = points[point_filter]

        point_colours = vis_utils.project_img_to_point_cloud(points,
                                                             image,
                                                             dataset.calib_dir,
                                                             img_idx)

        # Voxelize the point cloud for visualization
        voxel_grid = VoxelGrid()
        voxel_grid.voxelize(points, voxel_size=0.1,
                            create_leaf_layout=False)

        # Ground plane
        ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)

        ##############################
        # Visualization
        ##############################
        # Create VtkVoxelGrid
        vtk_voxel_grid = VtkVoxelGrid()
        vtk_voxel_grid.set_voxels(voxel_grid)

        vtk_point_cloud = VtkPointCloud()
        vtk_point_cloud.set_points(points, point_colours)

        # Create VtkAxes
        vtk_axes = vtk.vtkAxesActor()
        vtk_axes.SetTotalLength(5, 5, 5)

        # Create VtkBoxes for proposal boxes
        vtk_proposal_boxes = VtkBoxes()
        vtk_proposal_boxes.set_line_width(proposals_line_width)
        vtk_proposal_boxes.set_objects(proposal_objs,
                                       COLOUR_SCHEME_PREDICTIONS)

        # Create VtkBoxes for prediction boxes
        vtk_prediction_boxes = VtkPyramidBoxes()
        vtk_prediction_boxes.set_line_width(predictions_line_width)
        vtk_prediction_boxes.set_objects(prediction_objs,
                                         COLOUR_SCHEME_PREDICTIONS,
                                         show_orientations)

        # Create VtkBoxes for ground truth
        vtk_hard_gt_boxes = VtkBoxes()
        vtk_medium_gt_boxes = VtkBoxes()
        vtk_easy_gt_boxes = VtkBoxes()
        vtk_all_gt_boxes = VtkBoxes()

        vtk_hard_gt_boxes.set_objects(hard_gt_objs, COLOUR_SCHEME_PREDICTIONS,
                                      show_orientations)
        vtk_medium_gt_boxes.set_objects(medium_gt_objs, COLOUR_SCHEME_PREDICTIONS,
                                        show_orientations)
        vtk_easy_gt_boxes.set_objects(easy_gt_objs, COLOUR_SCHEME_PREDICTIONS,
                                      show_orientations)
        vtk_all_gt_boxes.set_objects(all_gt_objs, VtkBoxes.COLOUR_SCHEME_KITTI,
                                     show_orientations)

        # Create VtkTextLabels for 3D ious
        vtk_text_labels = VtkTextLabels()

        if draw_ious_3d and len(all_gt_boxes_3d) > 0:
            gt_positions_3d = np.asarray(all_gt_boxes_3d)[:, 0:3]
            vtk_text_labels.set_text_labels(
                gt_positions_3d,
                ['{:0.3f}'.format(iou_3d) for iou_3d in max_ious_3d])

        # Create VtkGroundPlane
        vtk_ground_plane = VtkGroundPlane()
        vtk_slice_bot_plane = VtkGroundPlane()
        vtk_slice_top_plane = VtkGroundPlane()

        vtk_ground_plane.set_plane(ground_plane, bev_extents)
        vtk_slice_bot_plane.set_plane(ground_plane + [0, 0, 0, -0.2], bev_extents)
        vtk_slice_top_plane.set_plane(ground_plane + [0, 0, 0, -2.0], bev_extents)

        # Create Voxel Grid Renderer in bottom half
        vtk_renderer = vtk.vtkRenderer()
        vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor)
        vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)

        vtk_renderer.AddActor(vtk_proposal_boxes.vtk_actor)
        vtk_renderer.AddActor(vtk_prediction_boxes.vtk_actor)

        vtk_renderer.AddActor(vtk_hard_gt_boxes.vtk_actor)
        vtk_renderer.AddActor(vtk_medium_gt_boxes.vtk_actor)
        vtk_renderer.AddActor(vtk_easy_gt_boxes.vtk_actor)
        vtk_renderer.AddActor(vtk_all_gt_boxes.vtk_actor)

        vtk_renderer.AddActor(vtk_text_labels.vtk_actor)

        # Add ground plane and slice planes
        vtk_renderer.AddActor(vtk_ground_plane.vtk_actor)
        vtk_renderer.AddActor(vtk_slice_bot_plane.vtk_actor)
        vtk_renderer.AddActor(vtk_slice_top_plane.vtk_actor)

        vtk_renderer.AddActor(vtk_axes)
        vtk_renderer.SetBackground(0.2, 0.3, 0.4)

        # Set initial properties for some actors
        vtk_point_cloud.vtk_actor.GetProperty().SetPointSize(3)
        vtk_proposal_boxes.vtk_actor.SetVisibility(0)
        vtk_voxel_grid.vtk_actor.SetVisibility(0)
        vtk_all_gt_boxes.vtk_actor.SetVisibility(0)

        vtk_ground_plane.vtk_actor.SetVisibility(0)
        vtk_slice_bot_plane.vtk_actor.SetVisibility(0)
        vtk_slice_top_plane.vtk_actor.SetVisibility(0)
        vtk_ground_plane.vtk_actor.GetProperty().SetOpacity(0.9)
        vtk_slice_bot_plane.vtk_actor.GetProperty().SetOpacity(0.9)
        vtk_slice_top_plane.vtk_actor.GetProperty().SetOpacity(0.9)

        # Setup Camera
        current_cam = vtk_renderer.GetActiveCamera()
        current_cam.Pitch(140.0)
        current_cam.Roll(180.0)

        # Zooms out to fit all points on screen
        vtk_renderer.ResetCamera()
        # Zoom in slightly
        current_cam.Zoom(2)

        # Reset the clipping range to show all points
        vtk_renderer.ResetCameraClippingRange()

        # Setup Render Window
        vtk_render_window = vtk.vtkRenderWindow()
        vtk_render_window.SetWindowName(
            "Predictions: Step {}, Sample {}, Min Score {}".format(
                global_step,
                sample_name,
                avod_score_threshold,
            ))
        vtk_render_window.SetSize(900, 600)
        vtk_render_window.AddRenderer(vtk_renderer)

        # Setup custom interactor style, which handles mouse and key events
        vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
        vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

        # Add custom interactor to toggle actor visibilities
        custom_interactor = vis_utils.CameraInfoInteractorStyle([
            vtk_proposal_boxes.vtk_actor,
            vtk_prediction_boxes.vtk_actor,
            vtk_voxel_grid.vtk_actor,
            vtk_point_cloud.vtk_actor,

            vtk_easy_gt_boxes.vtk_actor,
            vtk_medium_gt_boxes.vtk_actor,
            vtk_hard_gt_boxes.vtk_actor,
            vtk_all_gt_boxes.vtk_actor,

            vtk_ground_plane.vtk_actor,
            vtk_slice_bot_plane.vtk_actor,
            vtk_slice_top_plane.vtk_actor,
            vtk_text_labels.vtk_actor,
        ])

        vtk_render_window_interactor.SetInteractorStyle(custom_interactor)
        # Render in VTK
        vtk_render_window.Render()

        # Take a screenshot
        window_to_image_filter = vtk.vtkWindowToImageFilter()
        window_to_image_filter.SetInput(vtk_render_window)
        window_to_image_filter.Update()

        png_writer = vtk.vtkPNGWriter()
        file_name = img_out_dir + "/{}.png".format(sample_name)
        png_writer.SetFileName(file_name)
        png_writer.SetInputData(window_to_image_filter.GetOutput())
        png_writer.Write()

        print('Screenshot saved to ', file_name)

        #vtk_render_window_interactor.Start()  # Blocking


        vtk_render_window_interactor.Initialize()   # Non-Blocking
Exemple #5
0
def main():
    """
    Displays the bird's eye view maps for a KITTI sample.
    """

    ##############################
    # Options
    ##############################

    bev_generator = 'slices'

    slices_config = \
        """
        slices {
            height_lo: -0.2
            height_hi: 2.3
            num_slices: 5
        }
        """

    # Use None for a random image
    img_idx = None
    # img_idx = 142
    # img_idx = 191

    show_ground_truth = True  # Whether to overlay ground_truth boxes

    point_cloud_source = 'lidar'
    ##############################
    # End of Options
    ##############################

    dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL)
    dataset_config = DatasetBuilder.merge_defaults(dataset_config)

    # Overwrite bev_generator
    if bev_generator == 'slices':
        text_format.Merge(slices_config,
                          dataset_config.kitti_utils_config.bev_generator)
    else:
        raise ValueError('Invalid bev_generator')

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    if img_idx is None:
        img_idx = int(random.random() * dataset.num_samples)

    sample_name = "{:06}".format(img_idx)
    print('=== Showing BEV maps for image: {}.png ==='.format(sample_name))

    # Load image
    image = cv2.imread(dataset.get_rgb_image_path(sample_name))
    image_shape = image.shape[0:2]

    kitti_utils = dataset.kitti_utils
    point_cloud = kitti_utils.get_point_cloud(point_cloud_source, img_idx,
                                              image_shape)
    ground_plane = kitti_utils.get_ground_plane(sample_name)
    bev_images = kitti_utils.create_bev_maps(point_cloud, ground_plane)

    height_maps = np.array(bev_images.get("height_maps"))
    density_map = np.array(bev_images.get("density_map"))

    box_points, box_points_norm = [None, None]
    if show_ground_truth:
        # Get projected boxes
        obj_labels = obj_utils.read_labels(dataset.label_dir, img_idx)

        filtered_objs = obj_labels

        label_boxes = []
        for label in filtered_objs:
            box = box_3d_encoder.object_label_to_box_3d(label)
            label_boxes.append(box)

        label_boxes = np.array(label_boxes)
        box_points, box_points_norm = box_3d_projector.project_to_bev(
            label_boxes, [[-40, 40], [0, 70]])

    rgb_img_size = (np.array((1242, 375)) * 0.75).astype(np.int16)
    img_x_start = 60
    img_y_start = 330

    img_x = img_x_start
    img_y = img_y_start
    img_w = 400
    img_h = 350
    img_titlebar_h = 20

    # Show images
    vis_utils.cv2_show_image("Image",
                             image,
                             size_wh=rgb_img_size,
                             location_xy=(img_x, 0))

    # Height maps
    for map_idx in range(len(height_maps)):
        height_map = height_maps[map_idx]

        height_map = draw_boxes(height_map, box_points_norm)
        vis_utils.cv2_show_image("Height Map {}".format(map_idx),
                                 height_map,
                                 size_wh=(img_w, img_h),
                                 location_xy=(img_x, img_y))

        img_x += img_w
        # Wrap around
        if (img_x + img_w) > 1920:
            img_x = img_x_start
            img_y += img_h + img_titlebar_h

    # Density map
    density_map = draw_boxes(density_map, box_points_norm)
    vis_utils.cv2_show_image("Density Map",
                             density_map,
                             size_wh=(img_w, img_h),
                             location_xy=(img_x, img_y))

    cv2.waitKey()
    def load_samples(self,
                     indices,
                     sin_type=None,
                     sin_level=None,
                     sin_input_name=None,
                     gen_all_sin_inputs=False,
                     list_mask_2d=None):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for idx, sample_idx in enumerate(indices):
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            if list_mask_2d:
                mask_2d = list_mask_2d[idx]
            else:
                mask_2d = None

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
            rgb_image = cv_bgr_image[..., ::-1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib_p2 = calib_utils.read_calibration(
                self.calib_dir, int(sample_name)).p2

            # Read lidar with subsampling (handled before other preprocessing)
            if (sin_type == 'lowres') and (sin_input_name == 'lidar'):
                stride_sub = get_stride_sub(sin_level)
                point_cloud = get_point_cloud_sub(img_idx, self.calib_dir,
                                                  self.velo_dir, image_shape,
                                                  stride_sub)

            elif (sin_type == 'lowres') and gen_all_sin_inputs:
                stride_sub = get_stride_sub(sin_level)
                point_cloud = get_point_cloud_sub(img_idx, self.calib_dir,
                                                  self.velo_dir, image_shape,
                                                  stride_sub)
            else:
                point_cloud = self.kitti_utils.get_point_cloud(
                    self.bev_source, img_idx, image_shape)

            # Augmentation (Flipping)
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels
                ]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :,
                            0:3] = kitti_aug.apply_pca_jitter(image_input[:, :,
                                                                          0:3])

            # Add Single Input Noise
            if (sin_input_name in SINFields.SIN_INPUT_NAMES) and (
                    sin_type in SINFields.VALID_SIN_TYPES):
                image_input, point_cloud = genSINtoInputs(
                    image_input,
                    point_cloud,
                    sin_type=sin_type,
                    sin_level=sin_level,
                    sin_input_name=sin_input_name,
                    mask_2d=mask_2d,
                    frame_calib_p2=stereo_calib_p2)
            # Add Input Noise to all
            if gen_all_sin_inputs:
                image_input, point_cloud = genSINtoAllInputs(
                    image_input,
                    point_cloud,
                    sin_type=sin_type,
                    sin_level=sin_level,
                    mask_2d=mask_2d,
                    frame_calib_p2=stereo_calib_p2)

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps
            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane)

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))

            sample_dict = {
                constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_ANCHORS: label_anchors,
                constants.KEY_LABEL_CLASSES: label_classes,
                constants.KEY_IMAGE_INPUT: image_input,
                constants.KEY_BEV_INPUT: bev_input,
                constants.KEY_ANCHORS_INFO: anchors_info,
                constants.KEY_POINT_CLOUD: point_cloud,
                constants.KEY_GROUND_PLANE: ground_plane,
                constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
                constants.KEY_SAMPLE_NAME: sample_name,
                constants.KEY_SAMPLE_AUGS: sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts
    def _calculate_anchors_info(self, all_anchor_boxes_3d, empty_anchor_filter,
                                gt_labels):
        """Calculates the list of anchor information in the format:
            N x 8 [max_gt_2d_iou, max_gt_3d_iou, (6 x offsets), class_index]
                max_gt_out - highest 3D iou with any ground truth box
                offsets - encoded offsets [dx, dy, dz, d_dimx, d_dimy, d_dimz]
                class_index - the anchor's class as an index
                    (e.g. 0 or 1, for "Background" or "Car")

        Args:
            all_anchor_boxes_3d: list of anchors in box_3d format
                N x [x, y, z, l, w, h, ry]
            empty_anchor_filter: boolean mask of which anchors are non empty
            gt_labels: list of Object Label data format containing ground truth
                labels to generate positives/negatives from.

        Returns:
            list of anchor info
        """
        # Check for ground truth objects
        if len(gt_labels) == 0:
            raise Warning("No valid ground truth label to generate anchors.")

        kitti_utils = self._dataset.kitti_utils

        # Filter empty anchors
        anchor_indices = np.where(empty_anchor_filter)[0]
        anchor_boxes_3d = all_anchor_boxes_3d[empty_anchor_filter]

        # Convert anchor_boxes_3d to anchor format
        anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d)

        # Convert gt to boxes_3d -> anchors -> iou format
        gt_boxes_3d = np.asarray([
            box_3d_encoder.object_label_to_box_3d(gt_obj)
            for gt_obj in gt_labels
        ])
        gt_anchors = box_3d_encoder.box_3d_to_anchor(gt_boxes_3d,
                                                     ortho_rotate=True)

        rpn_iou_type = self.mini_batch_utils.rpn_iou_type
        if rpn_iou_type == '2d':
            # Convert anchors to 2d iou format
            anchors_for_2d_iou, _ = np.asarray(
                anchor_projector.project_to_bev(anchors,
                                                kitti_utils.bev_extents))

            gt_boxes_for_2d_iou, _ = anchor_projector.project_to_bev(
                gt_anchors, kitti_utils.bev_extents)

        elif rpn_iou_type == '3d':
            # Convert anchors to 3d iou format for calculation
            anchors_for_3d_iou = box_3d_encoder.box_3d_to_3d_iou_format(
                anchor_boxes_3d)

            gt_boxes_for_3d_iou = \
                box_3d_encoder.box_3d_to_3d_iou_format(gt_boxes_3d)
        else:
            raise ValueError('Invalid rpn_iou_type {}', rpn_iou_type)

        # Initialize sample and offset lists
        num_anchors = len(anchor_boxes_3d)
        all_info = np.zeros((num_anchors, self.mini_batch_utils.col_length))

        # Update anchor indices
        all_info[:, self.mini_batch_utils.col_anchor_indices] = anchor_indices

        # For each of the labels, generate samples
        for gt_idx in range(len(gt_labels)):

            gt_obj = gt_labels[gt_idx]
            gt_box_3d = gt_boxes_3d[gt_idx]

            # Get 2D or 3D IoU for every anchor
            if self.mini_batch_utils.rpn_iou_type == '2d':
                gt_box_for_2d_iou = gt_boxes_for_2d_iou[gt_idx]
                ious = evaluation.two_d_iou(gt_box_for_2d_iou,
                                            anchors_for_2d_iou)
            elif self.mini_batch_utils.rpn_iou_type == '3d':
                gt_box_for_3d_iou = gt_boxes_for_3d_iou[gt_idx]
                ious = evaluation.three_d_iou(gt_box_for_3d_iou,
                                              anchors_for_3d_iou)

            # Only update indices with a higher iou than before
            update_indices = np.greater(
                ious, all_info[:, self.mini_batch_utils.col_ious])

            # Get ious to update
            ious_to_update = ious[update_indices]

            # Calculate offsets, use 3D iou to get highest iou
            anchors_to_update = anchors[update_indices]
            gt_anchor = box_3d_encoder.box_3d_to_anchor(gt_box_3d,
                                                        ortho_rotate=True)
            offsets = anchor_encoder.anchor_to_offset(anchors_to_update,
                                                      gt_anchor)

            # Convert gt type to index
            class_idx = kitti_utils.class_str_to_index(gt_obj.type)

            # Update anchors info (indices already updated)
            # [index, iou, (offsets), class_index]
            all_info[update_indices,
                     self.mini_batch_utils.col_ious] = ious_to_update

            all_info[update_indices, self.mini_batch_utils.col_offsets_lo:self.
                     mini_batch_utils.col_offsets_hi] = offsets
            all_info[update_indices,
                     self.mini_batch_utils.col_class_idx] = class_idx

        return all_info
Exemple #8
0
def main():
    """This demo runs through all samples in the trainval set, and checks
    that the 3D box projection of all 'Car', 'Van', 'Pedestrian', and 'Cyclist'
    objects are in the correct flipped 2D location after applying
    modifications to the stereo p2 matrix.
    """

    dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL,
                                                 use_defaults=True)

    np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})

    all_samples = dataset.sample_names

    all_pixel_errors = []
    all_max_pixel_errors = []

    total_flip_time = 0.0

    for sample_idx in range(dataset.num_samples):

        sys.stdout.write('\r{} / {}'.format(sample_idx,
                                            dataset.num_samples - 1))

        sample_name = all_samples[sample_idx]

        img_idx = int(sample_name)

        # Run the main loop to run throughout the images
        frame_calibration_info = calib_utils.read_calibration(
            dataset.calib_dir,
            img_idx)

        # Load labels
        gt_labels = obj_utils.read_labels(dataset.label_dir, img_idx)
        gt_labels = dataset.kitti_utils.filter_labels(
            gt_labels, ['Car', 'Van', 'Pedestrian', 'Cyclist'])

        image = cv2.imread(dataset.get_rgb_image_path(sample_name))
        image_size = [image.shape[1], image.shape[0]]

        # Flip p2 matrix
        calib_p2 = frame_calibration_info.p2
        flipped_p2 = np.copy(calib_p2)
        flipped_p2[0, 2] = image.shape[1] - flipped_p2[0, 2]
        flipped_p2[0, 3] = -flipped_p2[0, 3]

        for obj_idx in range(len(gt_labels)):

            obj = gt_labels[obj_idx]

            # Get original 2D bounding boxes
            orig_box_3d = box_3d_encoder.object_label_to_box_3d(obj)
            orig_bbox_2d = box_3d_projector.project_to_image_space(
                orig_box_3d, calib_p2, truncate=True, image_size=image_size)

            # Skip boxes outside image
            if orig_bbox_2d is None:
                continue

            orig_bbox_2d_flipped = flip_box_2d(orig_bbox_2d, image_size)

            # Do flipping
            start_time = time.time()
            flipped_obj = kitti_aug.flip_label_in_3d_only(obj)
            flip_time = time.time() - start_time
            total_flip_time += flip_time

            box_3d_flipped = box_3d_encoder.object_label_to_box_3d(flipped_obj)
            new_bbox_2d_flipped = box_3d_projector.project_to_image_space(
                box_3d_flipped, flipped_p2, truncate=True,
                image_size=image_size)

            pixel_errors = new_bbox_2d_flipped - orig_bbox_2d_flipped
            max_pixel_error = np.amax(np.abs(pixel_errors))

            all_pixel_errors.append(pixel_errors)
            all_max_pixel_errors.append(max_pixel_error)

            if max_pixel_error > 5:
                print(' Error > 5px', sample_idx, max_pixel_error)
                print(np.round(orig_bbox_2d_flipped, 3),
                      np.round(new_bbox_2d_flipped, 3))

    print('Avg flip time:', total_flip_time / dataset.num_samples)

    # Convert to ndarrays
    all_pixel_errors = np.asarray(all_pixel_errors)
    all_max_pixel_errors = np.asarray(all_max_pixel_errors)

    # Print max values
    print(np.amax(all_max_pixel_errors))

    # Plot pixel errors
    fig, axes = plt.subplots(nrows=3, ncols=1)
    ax0, ax1, ax2 = axes.flatten()

    ax0.hist(all_pixel_errors[:, 0], 50, histtype='bar', facecolor='green')
    ax1.hist(all_pixel_errors[:, 2], 50, histtype='bar', facecolor='green')
    ax2.hist(all_max_pixel_errors, 50, histtype='bar', facecolor='green')

    plt.show()