Example #1
0
    def create_sliced_voxel_grid_2d(self,
                                    sample_name,
                                    source,
                                    image_shape=None):
        """Generates a filtered 2D voxel grid from point cloud data

        Args:
            sample_name: image name to generate stereo pointcloud from
            source: point cloud source - 'stereo', 'lidar', or 'depth'
            image_shape: image dimensions [h, w], only required when
                source is 'lidar' or 'depth'

        Returns:
            voxel_grid_2d: 3d voxel grid from the given image
        """
        img_idx = int(sample_name)
        ground_plane = obj_utils.get_road_plane(img_idx,
                                                self.dataset.planes_dir)

        point_cloud = self.get_point_cloud(source,
                                           img_idx,
                                           image_shape=image_shape)
        filtered_points = self._apply_slice_filter(point_cloud, ground_plane)
        # Create Voxel Grid
        voxel_grid_2d = VoxelGrid2D()
        voxel_grid_2d.voxelize_2d(filtered_points,
                                  self.voxel_size,
                                  extents=self.area_extents,
                                  ground_plane=ground_plane,
                                  create_leaf_layout=True)

        return voxel_grid_2d
Example #2
0
    def get_ground_plane(self, sample_name):
        """Reads the ground plane for the sample

        Args:
            sample_name: name of the sample, e.g. '000123'

        Returns:
            ground_plane: ground plane coefficients
        """
        ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                self.dataset.planes_dir)
        return ground_plane
Example #3
0
    def create_sliced_voxel_grid_2d(self,
                                    sample_name,
                                    source,
                                    image_shape=None):
        """Generates a filtered 2D voxel grid from point cloud data

        Args:
            sample_name: image name to generate stereo pointcloud from
            source: point cloud source, e.g. 'lidar'
            image_shape: image dimensions [h, w], only required when
                source is 'lidar' or 'depth'

        Returns:
            voxel_grid_2d: 3d voxel grid from the given image
        """
        img_idx = int(sample_name)
        ground_plane = obj_utils.get_road_plane(img_idx,
                                                self.dataset.planes_dir)

        # 点云->相机->像素(坐标)->图片(保留投影在图片里的点云投影坐标)
        # 返回在图片里的点云三维坐标,坐标在相机坐标系下
        point_cloud = self.get_point_cloud(source,
                                           img_idx,
                                           image_shape=image_shape)
        filtered_points = self._apply_slice_filter(
            point_cloud,
            ground_plane)  #保留在x,y,z范围里并且高度在[height_lo,height_hi]的点云

        # 将point_cloud等投影到图片里,并显示
        if img_idx == 23:
            self._project_and_show(sample_name, point_cloud, "red",
                                   "point_cloud")
            self._project_and_show(sample_name, filtered_points.T, "red",
                                   "filtered_points")

        # Create Voxel Grid
        # 将点云坐标离散化为体素voxel,相当于将空间划分为voxel_size大小的网格,统计在网格里的点云个数,网格坐标voxel_coords只保留一个。
        # 创建体素网,网格内包含点云则用0表示,不包含点云则用-1表示
        voxel_grid_2d = VoxelGrid2D()
        voxel_grid_2d.voxelize_2d(filtered_points,
                                  self.voxel_size,
                                  extents=self.area_extents,
                                  ground_plane=ground_plane,
                                  create_leaf_layout=True)

        return voxel_grid_2d
Example #4
0
    def load_pred_sample(self, rgb_image, point_cloud, frame_calib):
        """ This method is used for on-line prediction in the Avod model. 
            It tries to mimic the effects of load samples, but instead of loading from an index, it takes and image and lidar point cloud as arguments.
            Returns one sample_dict
        """
        obj_labels = None

        anchors_info = []

        label_anchors = np.zeros((1, 6))
        label_boxes_3d = np.zeros((1, 7))
        label_classes = np.zeros(1)

        image_shape = rgb_image.shape[0:2]
        image_input = rgb_image

        # Get ground plane TODO: This should be calculated when we have IMU input from vehicle. For now, just use kitti road plane index 0
        ground_plane = obj_utils.get_road_plane(0, self.planes_dir)

        # Create BEV maps
        bev_images = self.kitti_utils.create_bev_maps(point_cloud,
                                                      ground_plane)

        slice_maps = bev_images.get('slice_maps')
        cloud_maps = bev_images.get('cloud_maps')
        bev_maps = slice_maps + cloud_maps

        bev_input = np.dstack(bev_maps)

        sample_dict = {
            constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
            constants.KEY_LABEL_ANCHORS: label_anchors,
            constants.KEY_LABEL_CLASSES: label_classes,
            constants.KEY_IMAGE_INPUT: image_input,
            constants.KEY_BEV_INPUT: bev_input,
            constants.KEY_ANCHORS_INFO: anchors_info,
            constants.KEY_POINT_CLOUD: point_cloud,
            constants.KEY_GROUND_PLANE: ground_plane,
            constants.KEY_STEREO_CALIB_P2: frame_calib.p2,
            constants.KEY_SAMPLE_NAME:
            0,  # TODO: Find out how this is used later
            constants.KEY_SAMPLE_AUGS:
            []  # We don't need any augs for prediction
        }
        return sample_dict
Example #5
0
def main():
    """
     Visualization of the mini batch anchors for RpnModel training.

     Keys:
        F1: Toggle mini batch anchors
        F2: Toggle positive/negative proposal anchors
        F3: Toggle easy ground truth objects (Green)
        F4: Toggle medium ground truth objects (Orange)
        F5: Toggle hard ground truth objects (Red)
        F6: Toggle all ground truth objects (default off)
        F7: Toggle ground-plane
     """

    anchor_colour_scheme = {
        "Car": (255, 0, 0),             # Red
        "Pedestrian": (255, 150, 50),   # Orange
        "Cyclist": (150, 50, 100),      # Purple
        "DontCare": (255, 255, 255),    # White

        "Anchor": (150, 150, 150),      # Gray

        "Positive": (0, 255, 255),      # Teal
        "Negative": (255, 0, 255)       # Bright Purple
    }

    ##############################
    # Options
    ##############################
    show_orientations = True

    # Classes name
    config_name = 'car'
    # config_name = 'ped'
    # config_name = 'cyc'
    # config_name = 'ppl'

    # # # Random sample # # #
    sample_name = None

    # Small cars
    # sample_name = '000008'
    # sample_name = '000639'

    # # # Cars # # #
    # sample_name = "000001"
    # sample_name = "000050"
    # sample_name = "000112"
    # sample_name = "000169"
    # sample_name = "000191"

    # # # People # # #
    # sample_name = '000000'

    # val_half
    # sample_name = '000001'  # Hard, 1 far cyc
    # sample_name = '000005'  # Easy, 1 ped
    # sample_name = '000122'  # Easy, 1 cyc
    # sample_name = '000134'  # Hard, lots of people
    # sample_name = '000167'  # Medium, 1 ped, 2 cycs
    # sample_name = '000187'  # Medium, 1 ped on left
    # sample_name = '000381'  # Easy, 1 ped
    # sample_name = '000398'  # Easy, 1 ped
    # sample_name = '000401'  # Hard, obscured peds
    # sample_name = '000407'  # Easy, 1 ped
    sample_name = '000448'  # Hard, several far people
    # sample_name = '000486'  # Hard 2 obscured peds
    # sample_name = '000509'  # Easy, 1 ped
    # sample_name = '000718'  # Hard, lots of people
    # sample_name = '002216'  # Easy, 1 cyc

    # sample_name = "000000"
    # sample_name = "000011"
    # sample_name = "000015"
    # sample_name = "000028"
    # sample_name = "000035"
    # sample_name = "000134"
    # sample_name = "000167"
    # sample_name = '000379'
    # sample_name = '000381'
    # sample_name = '000397'
    # sample_name = '000398'
    # sample_name = '000401'
    # sample_name = '000407'
    # sample_name = '000486'
    # sample_name = '000509'

    # # Cyclists # # #
    # sample_name = '000122'
    # sample_name = '000448'

    # # # Multiple classes # # #
    # sample_name = "000764"
    ##############################
    # End of Options
    ##############################

    # Dataset config
    dataset_config_path = mlod.top_dir() + \
        '/demos/configs/mb_rpn_{}.config'.format(config_name)

    # Create Dataset
    dataset = DatasetBuilder.load_dataset_from_config(
        dataset_config_path)

    # Random sample
    if sample_name is None:
        sample_idx = np.random.randint(0, dataset.num_samples)
        sample_name = dataset.sample_list[sample_idx].name

    anchor_strides = dataset.kitti_utils.anchor_strides

    img_idx = int(sample_name)

    print("Showing mini batch for sample {}".format(sample_name))

    image = cv2.imread(dataset.get_rgb_image_path(sample_name))
    image_shape = [image.shape[1], image.shape[0]]

    # KittiUtils class
    dataset_utils = dataset.kitti_utils

    ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)

    point_cloud = obj_utils.get_depth_map_point_cloud(img_idx,
                                                      dataset.calib_dir,
                                                      dataset.depth_dir,
                                                      image_shape)

    points = point_cloud.T
    point_colours = vis_utils.project_img_to_point_cloud(points, image,
                                                         dataset.calib_dir,
                                                         img_idx)

    clusters, _ = dataset.get_cluster_info()
    anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

    # Read mini batch info
    anchors_info = dataset_utils.get_anchors_info(
        dataset.classes_name, anchor_strides, sample_name)

    if not anchors_info:
        # Exit early if anchors_info is empty
        print("Anchors info is empty, please try a different sample")
        return

    # Generate anchors for all classes
    all_anchor_boxes_3d = []
    for class_idx in range(len(dataset.classes)):

        anchor_boxes_3d = anchor_generator.generate(
            area_3d=dataset.kitti_utils.area_extents,
            anchor_3d_sizes=clusters[class_idx],
            anchor_stride=anchor_strides[class_idx],
            ground_plane=ground_plane)

        all_anchor_boxes_3d.extend(anchor_boxes_3d)
    all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d)

    # Use anchors info
    indices, ious, offsets, classes = anchors_info

    # Get non empty anchors from the indices
    anchor_boxes_3d = all_anchor_boxes_3d[indices]

    # Sample an RPN mini batch from the non empty anchors
    mini_batch_utils = dataset.kitti_utils.mini_batch_utils
    mb_mask_tf, _ = mini_batch_utils.sample_rpn_mini_batch(ious)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    mb_mask = sess.run(mb_mask_tf)

    mb_anchor_boxes_3d = anchor_boxes_3d[mb_mask]
    mb_anchor_ious = ious[mb_mask]

    # ObjectLabel list that hold all boxes to visualize
    obj_list = []

    num_positives = 0
    # Convert the mini_batch anchors to object list
    mini_batch_size = mini_batch_utils.rpn_mini_batch_size
    for i in range(mini_batch_size):
        if mb_anchor_ious[i] > mini_batch_utils.rpn_pos_iou_range[0]:
            obj_type = "Positive"
            num_positives += 1
        else:
            obj_type = "Negative"

        obj = box_3d_encoder.box_3d_to_object_label(mb_anchor_boxes_3d[i],
                                                    obj_type)
        obj_list.append(obj)

    print('Num positives', num_positives)

    # Convert all non-empty anchors to object list
    non_empty_anchor_objs = \
        [box_3d_encoder.box_3d_to_object_label(
            anchor_box_3d, obj_type='Anchor')
         for anchor_box_3d in anchor_boxes_3d]

    ##############################
    # Ground Truth
    ##############################
    if dataset.has_labels:
        easy_gt_objs, medium_gt_objs, \
            hard_gt_objs, all_gt_objs = demo_utils.get_gts_based_on_difficulty(
                dataset, img_idx)
    else:
        easy_gt_objs = medium_gt_objs = hard_gt_objs = all_gt_objs = []

    # Visualize 2D image
    vis_utils.visualization(dataset.rgb_image_dir, img_idx)
    plt.show(block=False)

    # Create VtkAxes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # Create VtkBoxes for mini batch anchors
    vtk_pos_anchor_boxes = VtkBoxes()
    vtk_pos_anchor_boxes.set_objects(obj_list, anchor_colour_scheme)

    # VtkBoxes for non empty anchors
    vtk_non_empty_anchors = VtkBoxes()
    vtk_non_empty_anchors.set_objects(non_empty_anchor_objs,
                                      anchor_colour_scheme)
    vtk_non_empty_anchors.set_line_width(0.1)

    # Create VtkBoxes for ground truth
    vtk_easy_gt_boxes, vtk_medium_gt_boxes, \
        vtk_hard_gt_boxes, vtk_all_gt_boxes = \
        demo_utils.create_gt_vtk_boxes(easy_gt_objs,
                                       medium_gt_objs,
                                       hard_gt_objs,
                                       all_gt_objs,
                                       show_orientations)

    vtk_point_cloud = VtkPointCloud()
    vtk_point_cloud.set_points(points, point_colours)
    vtk_point_cloud.vtk_actor.GetProperty().SetPointSize(2)

    vtk_ground_plane = VtkGroundPlane()
    vtk_ground_plane.set_plane(ground_plane, dataset.kitti_utils.bev_extents)

    # vtk_voxel_grid = VtkVoxelGrid()
    # vtk_voxel_grid.set_voxels(vx_grid)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()
    vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)
    vtk_renderer.AddActor(vtk_ground_plane.vtk_actor)

    vtk_renderer.AddActor(vtk_hard_gt_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_medium_gt_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_easy_gt_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_all_gt_boxes.vtk_actor)

    # vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor)
    vtk_renderer.AddActor(vtk_non_empty_anchors.vtk_actor)
    vtk_renderer.AddActor(vtk_pos_anchor_boxes.vtk_actor)
    vtk_renderer.AddActor(axes)
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(160.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    mb_iou_thresholds = np.round(
        [mini_batch_utils.rpn_neg_iou_range[1],
         mini_batch_utils.rpn_pos_iou_range[0]], 3)
    vtk_render_window.SetWindowName(
        'Sample {} RPN Mini Batch {}/{}, '
        'Num Positives {}'.format(
            sample_name,
            mb_iou_thresholds[0],
            mb_iou_thresholds[1],
            num_positives))
    vtk_render_window.SetSize(900, 500)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

    vtk_render_window_interactor.SetInteractorStyle(
        vis_utils.ToggleActorsInteractorStyle([
            vtk_non_empty_anchors.vtk_actor,
            vtk_pos_anchor_boxes.vtk_actor,

            vtk_easy_gt_boxes.vtk_actor,
            vtk_medium_gt_boxes.vtk_actor,
            vtk_hard_gt_boxes.vtk_actor,
            vtk_all_gt_boxes.vtk_actor,

            vtk_ground_plane.vtk_actor
        ]))

    # Render in VTK
    vtk_render_window.Render()
    vtk_render_window_interactor.Start()
Example #6
0
    def preprocess(self, indices):
        """Preprocesses anchor info and saves info to files

        Args:
            indices (int array): sample indices to process.
                If None, processes all samples
        """
        # Get anchor stride for class
        anchor_strides = self._anchor_strides

        dataset = self._dataset
        dataset_utils = self._dataset.kitti_utils
        classes_name = dataset.classes_name

        # Make folder if it doesn't exist yet
        output_dir = self.mini_batch_utils.get_file_path(classes_name,
                                                         anchor_strides,
                                                         sample_name=None)
        os.makedirs(output_dir, exist_ok=True)

        # Get clusters for class
        all_clusters_sizes, _ = dataset.get_cluster_info()

        anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

        # Load indices of data_split
        all_samples = dataset.sample_list

        if indices is None:
            indices = np.arange(len(all_samples))
        num_samples = len(indices)

        # For each image in the dataset, save info on the anchors
        for sample_idx in indices:
            # Get image name for given cluster
            sample_name = all_samples[sample_idx].name
            img_idx = int(sample_name)

            # Check for existing files and skip to the next
            if self._check_for_existing(classes_name, anchor_strides,
                                        sample_name):
                print("{} / {}: Sample already preprocessed".format(
                    sample_idx + 1, num_samples, sample_name))
                continue

            # Get ground truth and filter based on difficulty
            ground_truth_list = obj_utils.read_labels(dataset.label_dir,
                                                      img_idx)

            # If no valid ground truth, skip this image
            if not ground_truth_list:
                print("{} / {} No {}s for sample {} "
                      "(Ground Truth Filter)".format(sample_idx + 1,
                                                     num_samples, classes_name,
                                                     sample_name))

                # Output an empty file and move on to the next image.
                self._save_to_file(classes_name, anchor_strides, sample_name)
                continue

            # Filter objects to dataset classes
            filtered_gt_list = dataset_utils.filter_labels(ground_truth_list)
            filtered_gt_list = np.asarray(filtered_gt_list)

            # Filtering by class has no valid ground truth, skip this image
            if len(filtered_gt_list) == 0:
                print("{} / {} No {}s for sample {} "
                      "(Ground Truth Filter)".format(sample_idx + 1,
                                                     num_samples, classes_name,
                                                     sample_name))

                # Output an empty file and move on to the next image.
                self._save_to_file(classes_name, anchor_strides, sample_name)
                continue

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(img_idx,
                                                    dataset.planes_dir)

            image = Image.open(dataset.get_rgb_image_path(sample_name))
            image_shape = [image.size[1], image.size[0]]

            # Generate sliced 2D voxel grid for filtering
            vx_grid_2d = dataset_utils.create_sliced_voxel_grid_2d(
                sample_name,
                source=dataset.bev_source,
                image_shape=image_shape)

            # List for merging all anchors
            all_anchor_boxes_3d = []

            # Create anchors for each class
            for class_idx in range(len(dataset.classes)):
                # Generate anchors for all classes
                grid_anchor_boxes_3d = anchor_generator.generate(
                    area_3d=self._area_extents,
                    anchor_3d_sizes=all_clusters_sizes[class_idx],
                    anchor_stride=self._anchor_strides[class_idx],
                    ground_plane=ground_plane)

                all_anchor_boxes_3d.extend(grid_anchor_boxes_3d)

            # Filter empty anchors
            all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d)
            anchors = box_3d_encoder.box_3d_to_anchor(all_anchor_boxes_3d)
            empty_anchor_filter = anchor_filter.get_empty_anchor_filter_2d(
                anchors, vx_grid_2d, self._density_threshold)

            # Calculate anchor info
            anchors_info = self._calculate_anchors_info(
                all_anchor_boxes_3d, empty_anchor_filter, filtered_gt_list)

            anchor_ious = anchors_info[:, self.mini_batch_utils.col_ious]

            valid_iou_indices = np.where(anchor_ious > 0.0)[0]

            print("{} / {}:"
                  "{:>6} anchors, "
                  "{:>6} iou > 0.0, "
                  "for {:>3} {}(s) for sample {}".format(
                      sample_idx + 1, num_samples, len(anchors_info),
                      len(valid_iou_indices), len(filtered_gt_list),
                      classes_name, sample_name))

            # Save anchors info
            self._save_to_file(classes_name, anchor_strides, sample_name,
                               anchors_info)
Example #7
0
    def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(
                sample_name))
            rgb_image = cv_bgr_image[..., :: -1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Load MRCNN mask and features
            # print('Load MRCNN mask and features')
            mrcnn_result = self.kitti_utils.get_mrcnn_result(img_idx)
            # If no pedestrian can be seen on the images, break
            if not mrcnn_result:
                print('+++++++++++++ No mrcnn_result. load_samples, early end ++++++++++++++++')
                return []

            image_mrcnn_feature_input = mrcnn_result.item().get('features')
            image_mrcnn_bbox_input = mrcnn_result.item().get('rois')
            # rois: [batch, N, (y1, x1, y2, x2)] detection bounding boxes
            image_mask_input = mrcnn_result.item().get('masks')

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib_p2 = calib_utils.read_calibration(self.calib_dir,
                                                           int(sample_name)).p2

            point_cloud = self.kitti_utils.get_point_cloud(self.bev_source,
                                                           img_idx,
                                                           image_shape)

            # Augmentation (Flipping)
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [kitti_aug.flip_label_in_3d_only(obj)
                              for obj in obj_labels]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(
                    image_input[:, :, 0:3])

            if obj_labels is not None:
                label_boxes_3d = np.asarray(
                    [box_3d_encoder.object_label_to_box_3d(obj_label)
                     for obj_label in obj_labels])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps
            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane)

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))
            # print('bev_input.shape = ', bev_input.shape)

            sample_dict = {
                constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_ANCHORS: label_anchors,
                constants.KEY_LABEL_CLASSES: label_classes,

                constants.KEY_IMAGE_INPUT: image_input,
                constants.KEY_BEV_INPUT: bev_input,
                constants.KEY_IMAGE_MASK_INPUT: image_mask_input,
                constants.KEY_IMAGE_MRCNN_FEATURE_INPUT: image_mrcnn_feature_input,
                constants.KEY_IMAGE_MRCNN_BBOX_INPUT: image_mrcnn_bbox_input,

                constants.KEY_ANCHORS_INFO: anchors_info,

                constants.KEY_POINT_CLOUD: point_cloud,
                constants.KEY_GROUND_PLANE: ground_plane,
                constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,

                constants.KEY_SAMPLE_NAME: sample_name,
                constants.KEY_SAMPLE_AUGS: sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts
Example #8
0
def main():

    # Create Dataset
    dataset_config_path = mlod.root_dir() + \
        '/configs/mb_preprocessing/rpn_cars.config'
    dataset = DatasetBuilder.load_dataset_from_config(dataset_config_path)

    # Random sample
    sample_name = '000169'

    anchor_strides = dataset.kitti_utils.anchor_strides

    img_idx = int(sample_name)

    print("Showing mini batch for sample {}".format(sample_name))

    image = cv2.imread(dataset.get_rgb_image_path(sample_name))
    image_shape = [image.shape[1], image.shape[0]]

    # KittiUtils class
    dataset_utils = dataset.kitti_utils

    ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)

    point_cloud = obj_utils.get_depth_map_point_cloud(img_idx,
                                                      dataset.calib_dir,
                                                      dataset.depth_dir,
                                                      image_shape)

    # Grab ground truth
    ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx)
    ground_truth_list = dataset_utils.filter_labels(ground_truth_list)

    stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir,
                                                   img_idx).p2

    ##############################
    # Flip sample info
    ##############################
    start_time = time.time()

    flipped_image = kitti_aug.flip_image(image)
    flipped_point_cloud = kitti_aug.flip_point_cloud(point_cloud)
    flipped_gt_list = [kitti_aug.flip_label_in_3d_only(obj)
                       for obj in ground_truth_list]
    flipped_ground_plane = kitti_aug.flip_ground_plane(ground_plane)
    flipped_calib_p2 = kitti_aug.flip_stereo_calib_p2(
        stereo_calib_p2, image_shape)

    flipped_points = flipped_point_cloud.T

    print('flip sample', time.time() - start_time)

    ##############################
    # Generate anchors
    ##############################
    clusters, _ = dataset.get_cluster_info()
    anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

    # Read mini batch info
    anchors_info = dataset_utils.get_anchors_info(
        dataset.classes_name,
        anchor_strides,
        sample_name)

    all_anchor_boxes_3d = []
    all_ious = []
    for class_idx in range(len(dataset.classes)):

        anchor_boxes_3d = anchor_generator.generate(
            area_3d=dataset.kitti_utils.area_extents,
            anchor_3d_sizes=clusters[class_idx],
            anchor_stride=anchor_strides[class_idx],
            ground_plane=ground_plane)

        if anchors_info:
            indices, ious, offsets, classes = anchors_info

            # Get non empty anchors from the indices
            non_empty_anchor_boxes_3d = anchor_boxes_3d[indices]

            all_anchor_boxes_3d.extend(non_empty_anchor_boxes_3d)
            all_ious.extend(ious)

    if not len(all_anchor_boxes_3d) > 0:
        # Exit early if anchors_info is empty
        print("No anchors, Please try a different sample")
        return

    # Convert to ndarrays
    all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d)
    all_ious = np.asarray(all_ious)

    ##############################
    # Flip anchors
    ##############################
    start_time = time.time()

    flipped_anchor_boxes_3d = kitti_aug.flip_boxes_3d(all_anchor_boxes_3d,
                                                      flip_ry=False)

    print('flip anchors', time.time() - start_time)

    # Overwrite with flipped things
    all_anchor_boxes_3d = flipped_anchor_boxes_3d
    points = flipped_points
    ground_truth_list = flipped_gt_list
    ground_plane = flipped_ground_plane
Example #9
0
    def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_boxes_2d = np.zeros((1, 4))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            lidar_only = False
            num_views = 1

            if not lidar_only:
                # Load image (BGR -> RGB)
                cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
                rgb_image = cv_bgr_image[..., ::-1]
                image_shape = rgb_image.shape[0:2]

                # Append the depth channel
                if self.add_depth:
                    depth_map = obj_utils.get_depth_map(
                        img_idx, self.depth_dir)

                    # Set invalid pixels to max depth
                    depth_map[np.asarray(depth_map == 0.0)] = \
                        self.kitti_utils.bev_extents[1, 1]

                    # Add channel dimension to make stacking easier
                    depth_map = np.expand_dims(depth_map, 2)
                    image_input = np.concatenate([rgb_image, depth_map],
                                                 axis=2)
                else:
                    image_input = rgb_image
            else:
                image_shape = (370, 1224)

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)
            #ground_plane = np.array([0,-1,0,1.68])

            if lidar_only:
                p_matrix = np.zeros((num_views, 3, 4), dtype=float)
                if num_views > 0:
                    p_matrix[0] = np.array([[
                        8.39713500e+02, 3.58853400e+01, 4.48566750e+02,
                        2.31460650e+03
                    ],
                                            [
                                                1.02835238e-13, 8.54979440e+02,
                                                1.57320433e+02, 2.49655872e+03
                                            ],
                                            [
                                                0.00000000e+00, 7.97452000e-02,
                                                9.96815000e-01, 5.14357000e+00
                                            ]])

                    p_matrix[1] = np.array([[
                        1.20171708e+03, 9.73326000e+01, 3.99933320e+02,
                        1.04945816e+04
                    ],
                                            [
                                                1.41054657e+01, 8.65088160e+02,
                                                8.46334690e+01, 5.24229862e+03
                                            ],
                                            [
                                                1.62221000e-01, 1.62221000e-01,
                                                9.73329000e-01, 1.13555000e+01
                                            ]])
            else:
                # Get calibration
                stereo_calib_p2 = calib_utils.read_calibration(
                    self.calib_dir, int(sample_name)).p2

            point_cloud = self.kitti_utils.get_point_cloud(
                self.bev_source, img_idx, image_shape)
            # Augmentation (Flipping)
            if kitti_aug.AUG_FLIPPING in sample.augs:
                if not lidar_only:
                    image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label(obj, image_shape)
                    for obj in obj_labels
                ]

                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                if lidar_only:
                    for i in range(num_views):
                        p_matrix[i] = kitti_aug.flip_stereo_calib_p2(
                            p_matrix[i], image_shape)
                else:
                    stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                        stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if (kitti_aug.AUG_PCA_JITTER in sample.augs) and not lidar_only:
                image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(
                    image_input[:, :, 0:3], aug_img_noise=self.aug_img_noise)

            # Augmentation (Random Occlusion)
            if kitti_aug.AUG_RANDOM_OCC in sample.augs:
                point_cloud = kitti_aug.occ_aug(point_cloud, stereo_calib_p2,
                                                obj_labels)

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_boxes_2d = np.asarray([
                    box_3d_encoder.object_label_to_box_2d(obj_label)
                    for obj_label in obj_labels
                ])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                        label_boxes_2d = np.asarray([[-1.0, -1.0, -1.0, -1.0]])
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                        label_boxes_2d = np.zeros((1, 4))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps
            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane)
            height_maps = bev_images.get('height_maps')

            #bev random masking
            """
            bev_drop_p = 0.5
            rand_01 = random.random()
            mask_bev_layer = np.zeros(height_maps[0].shape,dtype=np.float32)
            if rand_01 > bev_drop_p:
                mask_idx = random.randint(0,4)
                height_maps[mask_idx] = mask_bev_layer
            """

            #print(height_maps[0].shape)
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))
            #bev_input = np.transpose(np.array(height_maps),(1,2,0))

            point_cloud = self.kitti_utils._apply_slice_filter(
                point_cloud, ground_plane).T

            if lidar_only:
                depth_map = np.zeros(
                    (num_views, image_shape[0], image_shape[1]), dtype=float)
                for i in range(num_views):
                    depth_map[i, :, :] = project_depths(
                        point_cloud, p_matrix[i], image_shape[0:2])
                depth_map_expand_dims = np.expand_dims(depth_map, axis=-1)
                sample_dict = {
                    constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                    constants.KEY_LABEL_ANCHORS: label_anchors,
                    constants.KEY_LABEL_CLASSES: label_classes,
                    constants.KEY_IMAGE_INPUT: depth_map_expand_dims,
                    constants.KEY_BEV_INPUT: bev_input,
                    constants.KEY_ANCHORS_INFO: anchors_info,
                    constants.KEY_POINT_CLOUD: point_cloud,
                    constants.KEY_GROUND_PLANE: ground_plane,
                    constants.KEY_STEREO_CALIB_P2: p_matrix[0:num_views],
                    constants.KEY_SAMPLE_NAME: sample_name,
                    constants.KEY_SAMPLE_AUGS: sample.augs,
                    constants.KEY_DPT_INPUT: depth_map
                }
            else:
                depth_map = project_depths(point_cloud, stereo_calib_p2,
                                           image_shape[0:2])
                depth_map = np.expand_dims(depth_map, axis=0)
                sample_dict = {
                    constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                    constants.KEY_LABEL_BOXES_2D: label_boxes_2d,
                    constants.KEY_LABEL_ANCHORS: label_anchors,
                    constants.KEY_LABEL_CLASSES: label_classes,
                    constants.KEY_IMAGE_INPUT: image_input,
                    constants.KEY_BEV_INPUT: bev_input,
                    constants.KEY_ANCHORS_INFO: anchors_info,
                    constants.KEY_POINT_CLOUD: point_cloud,
                    constants.KEY_GROUND_PLANE: ground_plane,
                    constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
                    constants.KEY_SAMPLE_NAME: sample_name,
                    constants.KEY_SAMPLE_AUGS: sample.augs,
                    constants.KEY_DPT_INPUT: depth_map
                }

            sample_dicts.append(sample_dict)

        return sample_dicts
Example #10
0
    def test_get_road_plane(self):
        plane = obj_utils.get_road_plane(0, self.test_data_planes_dir)

        np.testing.assert_allclose(plane, [-7.051729e-03, -9.997791e-01,
                                           -1.980151e-02, 1.680367e+00])
Example #11
0
def main():
    """This demo shows RPN proposals and MLOD predictions in the
    3D point cloud.

    Keys:
        F1: Toggle proposals
        F2: Toggle predictions
        F3: Toggle 3D voxel grid
        F4: Toggle point cloud

        F5: Toggle easy ground truth objects (Green)
        F6: Toggle medium ground truth objects (Orange)
        F7: Toggle hard ground truth objects (Red)
        F8: Toggle all ground truth objects (default off)

        F9: Toggle ground slice filter (default off)
        F10: Toggle offset slice filter (default off)
    """

    ##############################
    # Options
    ##############################
    rpn_score_threshold = 0.1
    mlod_score_threshold = 0.1

    proposals_line_width = 1.0
    predictions_line_width = 3.0
    show_orientations = True

    point_cloud_source = 'lidar'

    # Config file folder, default (<mlod_root>/data/outputs/<checkpoint_name>)
    config_dir = None

    checkpoint_name = 'mlod_fpn_people_n_m'
    global_step = 135000  # Latest checkpoint

    #data_split = 'val_half'
    #data_split = 'val'
    data_split = 'test'

    # Show 3D iou text
    draw_ious_3d = False

    sample_name = '000031'

    # # # Cars # # #
    # sample_name = '000050'
    # sample_name = '000104'
    # sample_name = '000169'
    # sample_name = '000191'
    # sample_name = '000360'
    # sample_name = '001783'
    # sample_name = '001820'

    # val split
    # sample_name = '000181'
    # sample_name = '000751'
    # sample_name = '000843'
    # sample_name = '000944'
    # sample_name = '006338'

    # # # People # # #
    # val_half split
    # sample_name = '000001'  # Hard, 1 far cyc
    # sample_name = '000005'  # Easy, 1 ped
    # sample_name = '000122'  # Easy, 1 cyc
    # sample_name = '000134'  # Hard, lots of people
    # sample_name = '000167'  # Medium, 1 ped, 2 cycs
    # sample_name = '000187'  # Medium, 1 ped on left
    # sample_name = '000381'  # Easy, 1 ped
    # sample_name = '000398'  # Easy, 1 ped
    # sample_name = '000401'  # Hard, obscured peds
    # sample_name = '000407'  # Easy, 1 ped
    # sample_name = '000448'  # Hard, several far people
    # sample_name = '000486'  # Hard 2 obscured peds
    # sample_name = '000509'  # Easy, 1 ped
    # sample_name = '000718'  # Hard, lots of people
    # sample_name = '002216'  # Easy, 1 cyc

    # val split
    # sample_name = '000015'
    # sample_name = '000048'
    # sample_name = '000058'
    # sample_name = '000076'    # Medium, few ped, 1 cyc
    # sample_name = '000108'
    # sample_name = '000118'
    # sample_name = '000145'
    # sample_name = '000153'
    # sample_name = '000186'
    # sample_name = '000195'
    # sample_name = '000199'
    # sample_name = '000397'
    # sample_name = '004425'
    # sample_name = '004474'    # Hard, many ped, 1 cyc
    # sample_name = '004657'    # Hard, Few cycl, few ped
    # sample_name = '006071'
    # sample_name = '006828'    # Hard, Few cycl, few ped
    # sample_name = '006908'    # Hard, Few cycl, few ped
    # sample_name = '007412'
    # sample_name = '007318'    # Hard, Few cycl, few ped

    ##############################
    # End of Options
    ##############################

    if data_split == 'test':
        draw_ious_3d = False

    if config_dir is None:
        config_dir = mlod.root_dir() + '/data/outputs/' + checkpoint_name

    # Parse experiment config
    pipeline_config_file = \
        config_dir + '/' + checkpoint_name + '.config'
    _, _, _, dataset_config = \
        config_builder_util.get_configs_from_pipeline_file(
            pipeline_config_file, is_training=False)

    dataset_config.data_split = data_split

    if data_split == 'test':
        dataset_config.data_split_dir = 'testing'
        dataset_config.has_labels = False

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    # Random sample
    if sample_name is None:
        sample_idx = np.random.randint(0, dataset.num_samples)
        sample_name = dataset.sample_names[sample_idx]

    ##############################
    # Setup Paths
    ##############################
    img_idx = int(sample_name)

    # Text files directory
    proposals_and_scores_dir = mlod.root_dir() + \
        '/data/outputs/' + checkpoint_name + '/predictions' +  \
        '/proposals_and_scores/' + dataset.data_split

    predictions_and_scores_dir = mlod.root_dir() + \
        '/data/outputs/' + checkpoint_name + '/predictions' +  \
        '/final_predictions_and_scores/' + dataset.data_split

    # Get checkpoint step
    steps = os.listdir(proposals_and_scores_dir)
    steps.sort(key=int)
    print('Available steps: {}'.format(steps))

    # Use latest checkpoint if no index provided
    if global_step is None:
        global_step = steps[-1]

    # Output images directory
    img_out_dir = mlod.root_dir() + '/data/outputs/' + checkpoint_name + \
        '/predictions/images_3d/{}/{}/{}'.format(dataset.data_split,
                                                 global_step,
                                                 rpn_score_threshold)

    if not os.path.exists(img_out_dir):
        os.makedirs(img_out_dir)

    ##############################
    # Proposals
    ##############################
    # Load proposals from files
    proposals_and_scores = np.loadtxt(
        proposals_and_scores_dir +
        "/{}/{}.txt".format(global_step, sample_name))

    proposals = proposals_and_scores[:, 0:7]
    proposal_scores = proposals_and_scores[:, 7]

    rpn_score_mask = proposal_scores > rpn_score_threshold

    proposals = proposals[rpn_score_mask]
    proposal_scores = proposal_scores[rpn_score_mask]
    print('Proposals:', len(proposal_scores), proposal_scores)

    proposal_objs = \
        [box_3d_encoder.box_3d_to_object_label(proposal,
                                               obj_type='Proposal')
         for proposal in proposals]

    ##############################
    # Predictions
    ##############################
    # Load proposals from files
    predictions_and_scores = np.loadtxt(
        predictions_and_scores_dir +
        "/{}/{}.txt".format(global_step, sample_name)).reshape(-1, 9)

    prediction_boxes_3d = predictions_and_scores[:, 0:7]
    prediction_scores = predictions_and_scores[:, 7]
    prediction_types = np.asarray(predictions_and_scores[:, 8], dtype=np.int32)

    mlod_score_mask = prediction_scores >= mlod_score_threshold
    prediction_boxes_3d = prediction_boxes_3d[mlod_score_mask]
    prediction_scores = prediction_scores[mlod_score_mask]
    print('Predictions: ', len(prediction_scores), prediction_scores)

    final_predictions = np.copy(prediction_boxes_3d)

    # # Swap l, w for predictions where w > l
    # swapped_indices = predictions[:, 4] > predictions[:, 3]
    # final_predictions[swapped_indices, 3] = predictions[swapped_indices, 4]
    # final_predictions[swapped_indices, 4] = predictions[swapped_indices, 3]

    prediction_objs = []
    dataset.classes = ['Pedestrian', 'Cyclist', 'Car']
    for pred_idx in range(len(final_predictions)):
        prediction_box_3d = final_predictions[pred_idx]
        prediction_type = dataset.classes[prediction_types[pred_idx]]
        prediction_obj = box_3d_encoder.box_3d_to_object_label(
            prediction_box_3d, obj_type=prediction_type)
        prediction_objs.append(prediction_obj)

    ##############################
    # Ground Truth
    ##############################
    dataset.has_labels = False
    if dataset.has_labels:
        # Get ground truth labels
        easy_gt_objs, medium_gt_objs, \
            hard_gt_objs, all_gt_objs = \
            demo_utils.get_gts_based_on_difficulty(dataset, img_idx)
    else:
        easy_gt_objs = medium_gt_objs = hard_gt_objs = all_gt_objs = []

    ##############################
    # 3D IoU
    ##############################
    if draw_ious_3d:
        # Convert to box_3d
        all_gt_boxes_3d = [
            box_3d_encoder.object_label_to_box_3d(gt_obj)
            for gt_obj in all_gt_objs
        ]
        pred_boxes_3d = [
            box_3d_encoder.object_label_to_box_3d(pred_obj)
            for pred_obj in prediction_objs
        ]
        max_ious_3d = demo_utils.get_max_ious_3d(all_gt_boxes_3d,
                                                 pred_boxes_3d)

    ##############################
    # Point Cloud
    ##############################
    image_path = dataset.get_rgb_image_path(sample_name)
    image = cv2.imread(image_path)

    point_cloud = dataset.kitti_utils.get_point_cloud(point_cloud_source,
                                                      img_idx,
                                                      image_shape=image.shape)
    point_cloud = np.asarray(point_cloud)

    # Filter point cloud to extents
    area_extents = np.asarray([[-40, 40], [-5, 3], [0, 70]])
    bev_extents = area_extents[[0, 2]]

    points = point_cloud.T
    point_filter = obj_utils.get_point_filter(point_cloud, area_extents)
    points = points[point_filter]

    point_colours = vis_utils.project_img_to_point_cloud(
        points, image, dataset.calib_dir, img_idx)

    # Voxelize the point cloud for visualization
    voxel_grid = VoxelGrid()
    voxel_grid.voxelize(points, voxel_size=0.1, create_leaf_layout=False)

    # Ground plane
    ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)

    ##############################
    # Visualization
    ##############################
    # Create VtkVoxelGrid
    vtk_voxel_grid = VtkVoxelGrid()
    vtk_voxel_grid.set_voxels(voxel_grid)

    vtk_point_cloud = VtkPointCloud()
    vtk_point_cloud.set_points(points, point_colours)

    # Create VtkAxes
    vtk_axes = vtk.vtkAxesActor()
    vtk_axes.SetTotalLength(5, 5, 5)

    # Create VtkBoxes for proposal boxes
    vtk_proposal_boxes = VtkBoxes()
    vtk_proposal_boxes.set_line_width(proposals_line_width)
    vtk_proposal_boxes.set_objects(proposal_objs, COLOUR_SCHEME_PREDICTIONS)

    # Create VtkBoxes for prediction boxes
    vtk_prediction_boxes = VtkPyramidBoxes()
    vtk_prediction_boxes.set_line_width(predictions_line_width)
    vtk_prediction_boxes.set_objects(prediction_objs,
                                     COLOUR_SCHEME_PREDICTIONS,
                                     show_orientations)

    # Create VtkBoxes for ground truth
    vtk_hard_gt_boxes = VtkBoxes()
    vtk_medium_gt_boxes = VtkBoxes()
    vtk_easy_gt_boxes = VtkBoxes()
    vtk_all_gt_boxes = VtkBoxes()

    vtk_hard_gt_boxes.set_objects(hard_gt_objs, COLOUR_SCHEME_PREDICTIONS,
                                  show_orientations)
    vtk_medium_gt_boxes.set_objects(medium_gt_objs, COLOUR_SCHEME_PREDICTIONS,
                                    show_orientations)
    vtk_easy_gt_boxes.set_objects(easy_gt_objs, COLOUR_SCHEME_PREDICTIONS,
                                  show_orientations)
    vtk_all_gt_boxes.set_objects(all_gt_objs, VtkBoxes.COLOUR_SCHEME_KITTI,
                                 show_orientations)

    # Create VtkTextLabels for 3D ious
    vtk_text_labels = VtkTextLabels()

    if draw_ious_3d and len(all_gt_boxes_3d) > 0:
        gt_positions_3d = np.asarray(all_gt_boxes_3d)[:, 0:3]
        vtk_text_labels.set_text_labels(
            gt_positions_3d,
            ['{:0.3f}'.format(iou_3d) for iou_3d in max_ious_3d])

    # Create VtkGroundPlane
    vtk_ground_plane = VtkGroundPlane()
    vtk_slice_bot_plane = VtkGroundPlane()
    vtk_slice_top_plane = VtkGroundPlane()

    vtk_ground_plane.set_plane(ground_plane, bev_extents)
    vtk_slice_bot_plane.set_plane(ground_plane + [0, 0, 0, -0.2], bev_extents)
    vtk_slice_top_plane.set_plane(ground_plane + [0, 0, 0, -2.0], bev_extents)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()
    vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor)
    vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)

    vtk_renderer.AddActor(vtk_proposal_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_prediction_boxes.vtk_actor)

    vtk_renderer.AddActor(vtk_hard_gt_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_medium_gt_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_easy_gt_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_all_gt_boxes.vtk_actor)

    vtk_renderer.AddActor(vtk_text_labels.vtk_actor)

    # Add ground plane and slice planes
    vtk_renderer.AddActor(vtk_ground_plane.vtk_actor)
    vtk_renderer.AddActor(vtk_slice_bot_plane.vtk_actor)
    vtk_renderer.AddActor(vtk_slice_top_plane.vtk_actor)

    #vtk_renderer.AddActor(vtk_axes)
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Set initial properties for some actors
    vtk_point_cloud.vtk_actor.GetProperty().SetPointSize(3)
    vtk_proposal_boxes.vtk_actor.SetVisibility(0)
    vtk_voxel_grid.vtk_actor.SetVisibility(0)
    vtk_all_gt_boxes.vtk_actor.SetVisibility(0)

    vtk_ground_plane.vtk_actor.SetVisibility(0)
    vtk_slice_bot_plane.vtk_actor.SetVisibility(0)
    vtk_slice_top_plane.vtk_actor.SetVisibility(0)
    vtk_ground_plane.vtk_actor.GetProperty().SetOpacity(0.9)
    vtk_slice_bot_plane.vtk_actor.GetProperty().SetOpacity(0.9)
    vtk_slice_top_plane.vtk_actor.GetProperty().SetOpacity(0.9)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(160.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()
    # Zoom in slightly
    current_cam.Zoom(3.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName(
        "Predictions: Step {}, Sample {}, Min Score {}".format(
            global_step,
            sample_name,
            mlod_score_threshold,
        ))
    vtk_render_window.SetSize(900, 600)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

    # Add custom interactor to toggle actor visibilities
    custom_interactor = vis_utils.CameraInfoInteractorStyle([
        vtk_proposal_boxes.vtk_actor,
        vtk_prediction_boxes.vtk_actor,
        vtk_voxel_grid.vtk_actor,
        vtk_point_cloud.vtk_actor,
        vtk_easy_gt_boxes.vtk_actor,
        vtk_medium_gt_boxes.vtk_actor,
        vtk_hard_gt_boxes.vtk_actor,
        vtk_all_gt_boxes.vtk_actor,
        vtk_ground_plane.vtk_actor,
        vtk_slice_bot_plane.vtk_actor,
        vtk_slice_top_plane.vtk_actor,
        vtk_text_labels.vtk_actor,
    ])

    vtk_render_window_interactor.SetInteractorStyle(custom_interactor)
    # Render in VTK
    vtk_render_window.Render()

    # Take a screenshot
    window_to_image_filter = vtk.vtkWindowToImageFilter()
    window_to_image_filter.SetInput(vtk_render_window)
    window_to_image_filter.Update()

    png_writer = vtk.vtkPNGWriter()
    file_name = img_out_dir + "/{}.png".format(sample_name)
    png_writer.SetFileName(file_name)
    png_writer.SetInputData(window_to_image_filter.GetOutput())
    png_writer.Write()

    print('Screenshot saved to ', file_name)

    vtk_render_window_interactor.Start()  # Blocking
    def preprocess(self, indices):
        """Preprocesses anchor info and saves info to files

        Args:
            indices (int array): sample indices to process.
                If None, processes all samples
        """
        # Get anchor stride for class
        anchor_params = self._anchor_params

        dataset = self._dataset
        dataset_utils = self._dataset.kitti_utils
        classes_name = dataset.classes_name

        anchor_strides = anchor_params['anchor_strides']
        # Make folder if it doesn't exist yet
        output_dir = self.mini_batch_utils.get_file_path(classes_name,
                                                         anchor_strides,
                                                         sample_name=None)
        os.makedirs(output_dir, exist_ok=True)

        # Get clusters for class
        #all_clusters_sizes, _ = dataset.get_cluster_info()

        anchor_generator = grid_anchor_bev_generator.GridAnchorBevGenerator()
        #anchor_type = self._dataset.kitti_utils.anchor_type

        # Load indices of data_split
        all_samples = dataset.sample_list

        if indices is None:
            indices = np.arange(len(all_samples))

        #indices = indices[:10]
        num_samples = len(indices)

        # For each image in the dataset, save info on the anchors
        for sample_idx in indices:
            # Get image name for given cluster
            sample_name = all_samples[sample_idx].name
            img_idx = int(sample_name)

            # Check for existing files and skip to the next
            if self._check_for_existing(classes_name, anchor_strides,
                                        sample_name):
                print("{} / {}: Sample already preprocessed".format(
                    sample_idx + 1, num_samples, sample_name))
                #continue

            # Get ground truth and filter based on difficulty
            ground_truth_list = obj_utils.read_labels(dataset.label_dir,
                                                      img_idx)

            # Filter objects to dataset classes
            filtered_gt_list = dataset_utils.filter_labels(ground_truth_list)
            filtered_gt_list = np.asarray(filtered_gt_list)

            # Filtering by class has no valid ground truth, skip this image
            if len(filtered_gt_list) == 0:
                print("{} / {} No {}s for sample {} "
                      "(Ground Truth Filter)".format(sample_idx + 1,
                                                     num_samples, classes_name,
                                                     sample_name))

                # Output an empty file and move on to the next image.
                #comment out for DEBUG
                self._save_to_file(classes_name, anchor_strides, sample_name)
                continue

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(img_idx,
                                                    dataset.planes_dir)

            image = Image.open(dataset.get_rgb_image_path(sample_name))
            image_shape = [image.size[1], image.size[0]]

            # List for merging all anchors
            all_level_anchor_boxes_bev = anchor_generator.generate(\
                    image_shapes=anchor_params['image_shapes'],
                    anchor_base_sizes=anchor_params['anchor_base_sizes'],
                    anchor_strides=anchor_params['anchor_strides'],
                    anchor_ratios=anchor_params['anchor_ratios'],
                    anchor_scales=anchor_params['anchor_scales'],
                    anchor_init_ry_type=anchor_params['anchor_init_ry_type'])
            #concate all levels anchors
            #commentt out for DEBUG
            all_anchor_boxes_bev = np.concatenate(all_level_anchor_boxes_bev)
            #all_anchor_boxes_bev = all_level_anchor_boxes_bev[-1]

            # Filter empty anchors (whose pts num < density_threshold)
            # prepare for anchors_3d which dont need ry.
            anchors_bev = all_anchor_boxes_bev.copy()
            if anchor_params['anchor_init_ry_type'] == -90:
                anchors_bev[:, [2, 3]] = anchors_bev[:, [3, 2]]
            anchors_3d = box_bev_encoder.box_bev_to_anchor_3d(anchors_bev, \
                    bev_shape=self._bev_shape, \
                    bev_extents=self._dataset.kitti_utils.area_extents[[0, 2]])
            #print(anchors_3d)
            image = Image.open(dataset.get_rgb_image_path(sample_name))
            image_shape = [image.size[1], image.size[0]]
            # Generate sliced 2D voxel grid for filtering
            vx_grid_2d = dataset_utils.create_sliced_voxel_grid_2d(
                sample_name,
                source=dataset.bev_source,
                image_shape=image_shape)
            empty_anchor_filter = anchor_filter.get_empty_anchor_filter_2d(
                anchors_3d, vx_grid_2d, self._density_threshold)
            print(
                f'Non empty anchor: {np.sum(empty_anchor_filter)} / {len(all_anchor_boxes_bev)}, \
                    sample_name: {sample_name}')

            #empty_anchor_filter = np.ones(all_anchor_boxes_bev.shape[0], dtype=bool)

            # Calculate anchor info
            anchors_info = self._calculate_anchors_info(
                all_anchor_boxes_bev, empty_anchor_filter, filtered_gt_list)
            n_invalid = np.sum(np.isnan(anchors_info))
            if n_invalid > 0:
                raise ValueError(
                    'Invalid value occur at anchors_info: nan, sample: ',
                    sample_name)
#
            anchor_ious = anchors_info[:, self.mini_batch_utils.col_ious]

            valid_iou_indices = np.where(anchor_ious > 0.0)[0]

            print("{} / {}:"
                  "{:>6} anchors, "
                  "{:>6} iou > 0.0, "
                  "for {:>3} {}(s) for sample {}".format(
                      sample_idx + 1, num_samples, len(anchors_info),
                      len(valid_iou_indices), len(filtered_gt_list),
                      classes_name, sample_name))

            # Save anchors info
            #comment out for DEBUG
            self._save_to_file(classes_name, anchor_strides, sample_name,
                               anchors_info)
Example #13
0
    def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
            rgb_image = cv_bgr_image[..., ::-1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib = calib_utils.read_calibration(
                self.calib_dir, int(sample_name))
            stereo_calib_p2 = stereo_calib.p2

            point_cloud = self.kitti_utils.get_point_cloud(
                self.bev_source, img_idx, image_shape)

            # Augmentation (Flipping)
            # WZN: the flipping augmentation flips both image(in camera frame), pointcloud (in Lidar frame), and calibration
            #matrix(between cam and Lidar) so the correspondence is still true.
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels
                ]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :,
                            0:3] = kitti_aug.apply_pca_jitter(image_input[:, :,
                                                                          0:3])

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps

            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane, output_indices=self.output_indices)
            #WZN produce input for sparse pooling
            if self.output_indices:
                voxel_indices = bev_images[1]
                pts_in_voxel = bev_images[2]
                bev_images = bev_images[0]

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))

            #import pdb
            #pdb.set_trace()
            #WZN produce input for sparse pooling
            if self.output_indices:
                sparse_pooling_input1 = produce_sparse_pooling_input(
                    gen_sparse_pooling_input_avod(
                        pts_in_voxel, voxel_indices, stereo_calib,
                        [image_shape[1], image_shape[0]],
                        bev_input.shape[0:2]),
                    stride=[1, 1])
                #WZN: Note here avod padded the vgg input by 4, so add it
                bev_input_padded = np.copy(bev_input.shape[0:2])
                bev_input_padded[0] = bev_input_padded[0] + 4
                sparse_pooling_input2 = produce_sparse_pooling_input(
                    gen_sparse_pooling_input_avod(
                        pts_in_voxel, voxel_indices, stereo_calib,
                        [image_shape[1], image_shape[0]], bev_input_padded),
                    stride=[8, 8])
                sparse_pooling_input = [
                    sparse_pooling_input1, sparse_pooling_input2
                ]
            else:
                sparse_pooling_input = None

            sample_dict = {
                constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_ANCHORS: label_anchors,
                constants.KEY_LABEL_CLASSES: label_classes,
                constants.KEY_IMAGE_INPUT: image_input,
                constants.KEY_BEV_INPUT: bev_input,
                #WZN: for sparse pooling
                constants.KEY_SPARSE_POOLING_INPUT: sparse_pooling_input,
                constants.KEY_ANCHORS_INFO: anchors_info,
                constants.KEY_POINT_CLOUD: point_cloud,
                constants.KEY_GROUND_PLANE: ground_plane,
                constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
                constants.KEY_SAMPLE_NAME: sample_name,
                constants.KEY_SAMPLE_AUGS: sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts
Example #14
0
def main():
    """
    Visualization of anchor filtering using 3D integral images
    """

    anchor_colour_scheme = {
        "Car": (0, 255, 0),  # Green
        "Pedestrian": (255, 150, 50),  # Orange
        "Cyclist": (150, 50, 100),  # Purple
        "DontCare": (255, 0, 0),  # Red
        "Anchor": (0, 0, 255),  # Blue
    }

    # Create Dataset
    dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL)

    # Options
    clusters, _ = dataset.get_cluster_info()
    sample_name = "000000"
    img_idx = int(sample_name)
    anchor_stride = [0.5, 0.5]
    ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)

    anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator(
        anchor_3d_sizes=clusters, anchor_stride=anchor_stride)

    area_extents = np.array([[-40, 40], [-5, 3], [0, 70]])

    # Generate anchors in box_3d format
    start_time = time.time()
    anchor_boxes_3d = anchor_3d_generator.generate(area_3d=area_extents,
                                                   ground_plane=ground_plane)
    end_time = time.time()
    print("Anchors generated in {} s".format(end_time - start_time))

    point_cloud = obj_utils.get_lidar_point_cloud(img_idx, dataset.calib_dir,
                                                  dataset.velo_dir)

    offset_dist = 2.0

    # Filter points within certain xyz range and offset from ground plane
    offset_filter = obj_utils.get_point_filter(point_cloud, area_extents,
                                               ground_plane, offset_dist)

    # Filter points within 0.2m of the road plane
    road_filter = obj_utils.get_point_filter(point_cloud, area_extents,
                                             ground_plane, 0.1)

    slice_filter = np.logical_xor(offset_filter, road_filter)
    point_cloud = point_cloud.T[slice_filter]

    # Generate Voxel Grid
    vx_grid_3d = voxel_grid.VoxelGrid()
    vx_grid_3d.voxelize(point_cloud, 0.1, area_extents)

    # Anchors in anchor format
    all_anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d)

    # Filter the boxes here!
    start_time = time.time()
    empty_filter = \
        anchor_filter.get_empty_anchor_filter(anchors=all_anchors,
                                              voxel_grid_3d=vx_grid_3d,
                                              density_threshold=1)
    anchor_boxes_3d = anchor_boxes_3d[empty_filter]
    end_time = time.time()
    print("Anchors filtered in {} s".format(end_time - start_time))

    # Visualize GT boxes
    # Grab ground truth
    ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx)

    # ----------
    # Test Sample extraction

    # Visualize from here
    vis_utils.visualization(dataset.rgb_image_dir, img_idx)
    plt.show(block=False)

    image_path = dataset.get_rgb_image_path(sample_name)
    image_shape = np.array(Image.open(image_path)).shape
    rgb_boxes, rgb_normalized_boxes = \
        anchor_projector.project_to_image_space(all_anchors, dataset,
                                                image_shape, img_idx)

    # Overlay boxes on images
    anchor_objects = []
    for anchor_idx in range(len(anchor_boxes_3d)):
        anchor_box_3d = anchor_boxes_3d[anchor_idx]
        obj_label = box_3d_encoder.box_3d_to_object_label(
            anchor_box_3d, 'Anchor')
        # Append to a list for visualization in VTK later
        anchor_objects.append(obj_label)

    for idx in range(len(ground_truth_list)):
        ground_truth_obj = ground_truth_list[idx]
        # Append to a list for visualization in VTK later
        anchor_objects.append(ground_truth_obj)

    # Create VtkAxes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # Create VtkBoxes for boxes
    vtk_boxes = VtkBoxes()
    vtk_boxes.set_objects(anchor_objects, anchor_colour_scheme)

    vtk_point_cloud = VtkPointCloud()
    vtk_point_cloud.set_points(point_cloud)

    vtk_voxel_grid = VtkVoxelGrid()
    vtk_voxel_grid.set_voxels(vx_grid_3d)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()
    vtk_renderer.AddActor(vtk_boxes.vtk_actor)
    # vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)
    vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor)
    vtk_renderer.AddActor(axes)
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(170.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName("Anchors")
    vtk_render_window.SetSize(900, 500)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

    vtk_render_window_interactor.SetInteractorStyle(
        vtk.vtkInteractorStyleTrackballCamera())

    # Render in VTK
    vtk_render_window.Render()
    vtk_render_window_interactor.Start()  # Blocking
Example #15
0
    def load_samples(self,
                     indices,
                     sin_type=None,
                     sin_level=None,
                     sin_input_name=None,
                     gen_all_sin_inputs=False,
                     list_mask_2d=None):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for idx, sample_idx in enumerate(indices):
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            if list_mask_2d:
                mask_2d = list_mask_2d[idx]
            else:
                mask_2d = None

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
            rgb_image = cv_bgr_image[..., ::-1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib_p2 = calib_utils.read_calibration(
                self.calib_dir, int(sample_name)).p2

            # Read lidar with subsampling (handled before other preprocessing)
            if (sin_type == 'lowres') and (sin_input_name == 'lidar'):
                stride_sub = get_stride_sub(sin_level)
                point_cloud = get_point_cloud_sub(img_idx, self.calib_dir,
                                                  self.velo_dir, image_shape,
                                                  stride_sub)

            elif (sin_type == 'lowres') and gen_all_sin_inputs:
                stride_sub = get_stride_sub(sin_level)
                point_cloud = get_point_cloud_sub(img_idx, self.calib_dir,
                                                  self.velo_dir, image_shape,
                                                  stride_sub)
            else:
                point_cloud = self.kitti_utils.get_point_cloud(
                    self.bev_source, img_idx, image_shape)

            # Augmentation (Flipping)
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels
                ]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :,
                            0:3] = kitti_aug.apply_pca_jitter(image_input[:, :,
                                                                          0:3])

            # Add Single Input Noise
            if (sin_input_name in SINFields.SIN_INPUT_NAMES) and (
                    sin_type in SINFields.VALID_SIN_TYPES):
                image_input, point_cloud = genSINtoInputs(
                    image_input,
                    point_cloud,
                    sin_type=sin_type,
                    sin_level=sin_level,
                    sin_input_name=sin_input_name,
                    mask_2d=mask_2d,
                    frame_calib_p2=stereo_calib_p2)
            # Add Input Noise to all
            if gen_all_sin_inputs:
                image_input, point_cloud = genSINtoAllInputs(
                    image_input,
                    point_cloud,
                    sin_type=sin_type,
                    sin_level=sin_level,
                    mask_2d=mask_2d,
                    frame_calib_p2=stereo_calib_p2)

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps
            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane)

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))

            sample_dict = {
                constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_ANCHORS: label_anchors,
                constants.KEY_LABEL_CLASSES: label_classes,
                constants.KEY_IMAGE_INPUT: image_input,
                constants.KEY_BEV_INPUT: bev_input,
                constants.KEY_ANCHORS_INFO: anchors_info,
                constants.KEY_POINT_CLOUD: point_cloud,
                constants.KEY_GROUND_PLANE: ground_plane,
                constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
                constants.KEY_SAMPLE_NAME: sample_name,
                constants.KEY_SAMPLE_AUGS: sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts
Example #16
0
def main():
    """Flip RPN Mini Batch
     Visualization of the mini batch anchors for RpnModel training.

     Keys:
         F1: Toggle mini batch anchors
         F2: Flipped
     """

    anchor_colour_scheme = {
        "Car": (255, 0, 0),  # Red
        "Pedestrian": (255, 150, 50),  # Orange
        "Cyclist": (150, 50, 100),  # Purple
        "DontCare": (255, 255, 255),  # White
        "Anchor": (150, 150, 150),  # Gray
        "Regressed Anchor": (255, 255, 0),  # Yellow
        "Positive": (0, 255, 255),  # Teal
        "Negative": (255, 0, 255)  # Purple
    }

    dataset_config_path = mlod.root_dir() + \
        '/configs/mb_rpn_demo_cars.config'

    # dataset_config_path = mlod.root_dir() + \
    #     '/configs/mb_rpn_demo_people.config'

    ##############################
    # Options
    ##############################
    # # # Random sample # # #
    sample_name = None

    # # # Cars # # #
    # sample_name = "000001"
    # sample_name = "000050"
    # sample_name = "000104"
    # sample_name = "000112"
    # sample_name = "000169"
    # sample_name = "000191"

    sample_name = "003801"

    # # # Pedestrians # # #
    # sample_name = "000000"
    # sample_name = "000011"
    # sample_name = "000015"
    # sample_name = "000028"
    # sample_name = "000035"
    # sample_name = "000134"
    # sample_name = "000167"
    # sample_name = '000379'
    # sample_name = '000381'
    # sample_name = '000397'
    # sample_name = '000398'
    # sample_name = '000401'
    # sample_name = '000407'
    # sample_name = '000486'
    # sample_name = '000509'

    # # Cyclists # # #
    # sample_name = '000122'
    # sample_name = '000448'

    # # # Multiple classes # # #
    # sample_name = "000764"
    ##############################
    # End of Options
    ##############################

    # Create Dataset
    dataset = DatasetBuilder.load_dataset_from_config(dataset_config_path)

    # Random sample
    if sample_name is None:
        sample_idx = np.random.randint(0, dataset.num_samples)
        sample_name = dataset.sample_list[sample_idx]

    anchor_strides = dataset.kitti_utils.anchor_strides

    img_idx = int(sample_name)

    print("Showing mini batch for sample {}".format(sample_name))

    image = cv2.imread(dataset.get_rgb_image_path(sample_name))
    image_shape = [image.shape[1], image.shape[0]]

    # KittiUtils class
    dataset_utils = dataset.kitti_utils

    ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)

    point_cloud = obj_utils.get_depth_map_point_cloud(img_idx,
                                                      dataset.calib_dir,
                                                      dataset.depth_dir,
                                                      image_shape)
    points = point_cloud.T

    # Grab ground truth
    ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx)
    ground_truth_list = dataset_utils.filter_labels(ground_truth_list)

    stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir,
                                                   img_idx).p2

    ##############################
    # Flip sample info
    ##############################
    start_time = time.time()

    flipped_image = kitti_aug.flip_image(image)
    flipped_point_cloud = kitti_aug.flip_point_cloud(point_cloud)
    flipped_gt_list = [
        kitti_aug.flip_label_in_3d_only(obj) for obj in ground_truth_list
    ]
    flipped_ground_plane = kitti_aug.flip_ground_plane(ground_plane)
    flipped_calib_p2 = kitti_aug.flip_stereo_calib_p2(stereo_calib_p2,
                                                      image_shape)

    print('flip sample', time.time() - start_time)

    flipped_points = flipped_point_cloud.T
    point_colours = vis_utils.project_img_to_point_cloud(
        points, image, dataset.calib_dir, img_idx)

    ##############################
    # Generate anchors
    ##############################
    clusters, _ = dataset.get_cluster_info()
    anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

    # Read mini batch info
    anchors_info = dataset_utils.get_anchors_info(sample_name)

    all_anchor_boxes_3d = []
    all_ious = []
    all_offsets = []
    for class_idx in range(len(dataset.classes)):

        anchor_boxes_3d = anchor_generator.generate(
            area_3d=dataset.kitti_utils.area_extents,
            anchor_3d_sizes=clusters[class_idx],
            anchor_stride=anchor_strides[class_idx],
            ground_plane=ground_plane)

        if len(anchors_info[class_idx]) > 0:
            indices, ious, offsets, classes = anchors_info[class_idx]

            # Get non empty anchors from the indices
            non_empty_anchor_boxes_3d = anchor_boxes_3d[indices]

            all_anchor_boxes_3d.extend(non_empty_anchor_boxes_3d)
            all_ious.extend(ious)
            all_offsets.extend(offsets)

    if not len(all_anchor_boxes_3d) > 0:
        # Exit early if anchors_info is empty
        print("No anchors, Please try a different sample")
        return

    # Convert to ndarrays
    all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d)
    all_ious = np.asarray(all_ious)
    all_offsets = np.asarray(all_offsets)

    ##############################
    # Flip anchors
    ##############################
    start_time = time.time()

    # Flip anchors and offsets
    flipped_anchor_boxes_3d = kitti_aug.flip_boxes_3d(all_anchor_boxes_3d,
                                                      flip_ry=False)
    all_offsets[:, 0] = -all_offsets[:, 0]

    print('flip anchors and offsets', time.time() - start_time)

    # Overwrite with flipped things
    all_anchor_boxes_3d = flipped_anchor_boxes_3d
    points = flipped_points
    ground_truth_list = flipped_gt_list
    ground_plane = flipped_ground_plane

    ##############################
    # Mini batch sampling
    ##############################
    # Sample an RPN mini batch from the non empty anchors
    mini_batch_utils = dataset.kitti_utils.mini_batch_utils
    mb_mask_tf, _ = mini_batch_utils.sample_rpn_mini_batch(all_ious)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    mb_mask = sess.run(mb_mask_tf)

    mb_anchor_boxes_3d = all_anchor_boxes_3d[mb_mask]
    mb_anchor_ious = all_ious[mb_mask]
    mb_anchor_offsets = all_offsets[mb_mask]

    # ObjectLabel list that hold all boxes to visualize
    obj_list = []

    # Convert the mini_batch anchors to object list
    for i in range(len(mb_anchor_boxes_3d)):
        if mb_anchor_ious[i] > mini_batch_utils.rpn_pos_iou_range[0]:
            obj_type = "Positive"
        else:
            obj_type = "Negative"

        obj = box_3d_encoder.box_3d_to_object_label(mb_anchor_boxes_3d[i],
                                                    obj_type)
        obj_list.append(obj)

    # Convert all non-empty anchors to object list
    non_empty_anchor_objs = \
        [box_3d_encoder.box_3d_to_object_label(
            anchor_box_3d, obj_type='Anchor')
         for anchor_box_3d in all_anchor_boxes_3d]

    ##############################
    # Regress Positive Anchors
    ##############################
    # Convert anchor_boxes_3d to anchors and apply offsets
    mb_pos_mask = mb_anchor_ious > mini_batch_utils.rpn_pos_iou_range[0]
    mb_pos_anchor_boxes_3d = mb_anchor_boxes_3d[mb_pos_mask]
    mb_pos_anchor_offsets = mb_anchor_offsets[mb_pos_mask]

    mb_pos_anchors = box_3d_encoder.box_3d_to_anchor(mb_pos_anchor_boxes_3d)
    regressed_pos_anchors = anchor_encoder.offset_to_anchor(
        mb_pos_anchors, mb_pos_anchor_offsets)

    # Convert regressed anchors to ObjectLabels for visualization
    regressed_anchor_boxes_3d = box_3d_encoder.anchors_to_box_3d(
        regressed_pos_anchors, fix_lw=True)
    regressed_anchor_objs = \
        [box_3d_encoder.box_3d_to_object_label(
            box_3d, obj_type='Regressed Anchor')
         for box_3d in regressed_anchor_boxes_3d]

    ##############################
    # Visualization
    ##############################
    cv2.imshow('{} flipped'.format(sample_name), flipped_image)
    cv2.waitKey()

    # Create VtkAxes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # Create VtkBoxes for mini batch anchors
    vtk_pos_anchor_boxes = VtkBoxes()
    vtk_pos_anchor_boxes.set_objects(obj_list, anchor_colour_scheme)

    # VtkBoxes for non empty anchors
    vtk_non_empty_anchors = VtkBoxes()
    vtk_non_empty_anchors.set_objects(non_empty_anchor_objs,
                                      anchor_colour_scheme)
    vtk_non_empty_anchors.set_line_width(0.1)

    # VtkBoxes for regressed anchors
    vtk_regressed_anchors = VtkBoxes()
    vtk_regressed_anchors.set_objects(regressed_anchor_objs,
                                      anchor_colour_scheme)
    vtk_regressed_anchors.set_line_width(5.0)

    # Create VtkBoxes for ground truth
    vtk_gt_boxes = VtkBoxes()
    vtk_gt_boxes.set_objects(ground_truth_list,
                             anchor_colour_scheme,
                             show_orientations=True)

    vtk_point_cloud = VtkPointCloud()
    vtk_point_cloud.set_points(points, point_colours)

    vtk_ground_plane = VtkGroundPlane()
    vtk_ground_plane.set_plane(ground_plane, dataset.kitti_utils.bev_extents)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()

    vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)
    vtk_renderer.AddActor(vtk_non_empty_anchors.vtk_actor)
    vtk_renderer.AddActor(vtk_pos_anchor_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_regressed_anchors.vtk_actor)
    vtk_renderer.AddActor(vtk_gt_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_ground_plane.vtk_actor)

    vtk_renderer.AddActor(axes)
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(160.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName("RPN Mini Batch")
    vtk_render_window.SetSize(900, 500)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

    vtk_render_window_interactor.SetInteractorStyle(
        vis_utils.ToggleActorsInteractorStyle([
            vtk_non_empty_anchors.vtk_actor,
            vtk_pos_anchor_boxes.vtk_actor,
            vtk_regressed_anchors.vtk_actor,
            vtk_ground_plane.vtk_actor,
        ]))

    # Render in VTK
    vtk_render_window.Render()
    vtk_render_window_interactor.Start()
Example #17
0
def main():
    """Shows a flipped sample in 3D
    """

    # Create Dataset
    dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL)

    ##############################
    # Options
    ##############################
    # sample_name = "000191"
    sample_name = "000104"
    img_idx = int(sample_name)
    print("Showing anchors for sample {}".format(sample_name))

    ##############################
    # Load Sample Data
    ##############################
    ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)

    image = cv2.imread(dataset.get_rgb_image_path(sample_name))
    image_shape = [image.shape[1], image.shape[0]]

    # Get point cloud
    point_cloud = obj_utils.get_depth_map_point_cloud(img_idx,
                                                      dataset.calib_dir,
                                                      dataset.depth_dir,
                                                      image_shape)

    points = np.array(point_cloud).T

    # Ground truth
    gt_labels = obj_utils.read_labels(dataset.label_dir, img_idx)

    # Filter ground truth
    gt_labels = dataset.kitti_utils.filter_labels(gt_labels)

    ##############################
    # Flip stuff
    ##############################
    image_flipped = np.fliplr(image)

    # Flip ground plane coeff (x)
    ground_plane_flipped = np.copy(ground_plane)
    ground_plane_flipped[0] = -ground_plane_flipped[0]

    # Flip 3D points
    points_flipped = kitti_aug.flip_points(points)

    # Get point cloud colours
    point_colours_flipped = project_flipped_img_to_point_cloud(
        points_flipped, image_flipped, dataset.calib_dir, img_idx)

    # Flip ground truth boxes
    gt_labels_flipped = [
        kitti_aug.flip_label_in_3d_only(obj) for obj in gt_labels
    ]

    ##############################
    # VTK Visualization
    ##############################
    # Axes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # Point cloud
    vtk_point_cloud = VtkPointCloud()
    vtk_point_cloud.set_points(points_flipped,
                               point_colours=point_colours_flipped)

    # # Ground Truth Boxes
    vtk_boxes = VtkBoxes()
    vtk_boxes.set_objects(gt_labels_flipped,
                          VtkBoxes.COLOUR_SCHEME_KITTI,
                          show_orientations=True)

    # Renderer
    vtk_renderer = vtk.vtkRenderer()
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Add Actors to Rendered
    vtk_renderer.AddActor(axes)
    vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)
    vtk_renderer.AddActor(vtk_boxes.vtk_actor)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(170.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName("Anchors")
    vtk_render_window.SetSize(900, 500)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)
    vtk_render_window_interactor.SetInteractorStyle(
        vis_utils.ToggleActorsInteractorStyle([
            vtk_point_cloud.vtk_actor,
        ]))

    # Render in VTK
    vtk_render_window.Render()
    vtk_render_window_interactor.Start()
Example #18
0
def main():
    """
    Visualization for comparison of anchor filtering with
        2D vs 3D integral images

    Keys:
        F1: Toggle 3D integral image filtered anchors
        F2: Toggle 2D integral image filtered anchors
        F3: Toggle 2D integral image empty anchors
    """

    anchor_2d_colour_scheme = {"Anchor": (0, 0, 255)}  # Blue
    anchor_3d_colour_scheme = {"Anchor": (0, 255, 0)}  # Green
    anchor_unfiltered_colour_scheme = {"Anchor": (255, 0, 255)}  # Purple

    # Create Dataset
    dataset = DatasetBuilder.build_kitti_dataset(
        DatasetBuilder.KITTI_TRAINVAL)

    sample_name = "000001"
    img_idx = int(sample_name)
    print("Showing anchors for sample {}".format(sample_name))

    # Options
    # These clusters are from the trainval set and give more 2D anchors than 3D
    clusters = np.array([[3.55, 1.835, 1.525], [4.173, 1.69, 1.49]])
    anchor_stride = [3.0, 3.0]

    ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)
    area_extents = np.array([[-40, 40], [-5, 3], [0, 70]])

    anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

    # Generate anchors
    start_time = time.time()
    anchor_boxes_3d = anchor_3d_generator.generate(area_3d=area_extents,
                                                   anchor_3d_sizes=clusters,
                                                   anchor_stride=anchor_stride,
                                                   ground_plane=ground_plane)
    end_time = time.time()
    print("Anchors generated in {} s".format(end_time - start_time))

    # Get point cloud
    point_cloud = obj_utils.get_stereo_point_cloud(img_idx, dataset.calib_dir,
                                                   dataset.disp_dir)

    ground_offset_dist = 0.2
    offset_dist = 2.0

    # Filter points within certain xyz range and offset from ground plane
    # Filter points within 0.2m of the road plane
    slice_filter = dataset.kitti_utils.create_slice_filter(point_cloud,
                                                           area_extents,
                                                           ground_plane,
                                                           ground_offset_dist,
                                                           offset_dist)
    points = np.array(point_cloud).T
    points = points[slice_filter]

    anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d)

    # Create 2D voxel grid
    vx_grid_2d = voxel_grid_2d.VoxelGrid2D()
    vx_grid_2d.voxelize_2d(points, 0.1, area_extents)

    # Create 3D voxel grid
    vx_grid_3d = voxel_grid.VoxelGrid()
    vx_grid_3d.voxelize(points, 0.1, area_extents)

    # Filter the boxes here!
    start_time = time.time()
    empty_filter_2d = anchor_filter.get_empty_anchor_filter_2d(
        anchors=anchors,
        voxel_grid_2d=vx_grid_2d,
        density_threshold=1)
    anchors_2d = anchor_boxes_3d[empty_filter_2d]
    end_time = time.time()
    print("2D Anchors filtered in {} s".format(end_time - start_time))
    print("Number of 2D anchors remaining: %d" % (anchors_2d.shape[0]))

    unfiltered_anchors_2d = anchor_boxes_3d[np.logical_not(empty_filter_2d)]

    # 3D filtering
    start_time = time.time()
    empty_filter_3d = anchor_filter.get_empty_anchor_filter(
        anchors=anchors,
        voxel_grid_3d=vx_grid_3d,
        density_threshold=1)
    anchor_boxes_3d = anchor_boxes_3d[empty_filter_3d]
    end_time = time.time()
    print("3D Anchors filtered in {} s".format(end_time - start_time))
    print("Number of 3D anchors remaining: %d" % (anchor_boxes_3d.shape[0]))

    anchor_2d_objects = []
    for anchor_idx in range(len(anchors_2d)):
        anchor = anchors_2d[anchor_idx]
        obj_label = box_3d_encoder.box_3d_to_object_label(anchor, 'Anchor')

        # Append to a list for visualization in VTK later
        anchor_2d_objects.append(obj_label)

    anchor_3d_objects = []
    for anchor_idx in range(len(anchor_boxes_3d)):
        anchor = anchor_boxes_3d[anchor_idx]
        obj_label = box_3d_encoder.box_3d_to_object_label(anchor, 'Anchor')

        # Append to a list for visualization in VTK later
        anchor_3d_objects.append(obj_label)

    unfiltered_anchor_objects = []
    for anchor_idx in range(len(unfiltered_anchors_2d)):
        anchor = unfiltered_anchors_2d[anchor_idx]
        obj_label = box_3d_encoder.box_3d_to_object_label(anchor, 'Anchor')

        # Append to a list for visualization in VTK later
        unfiltered_anchor_objects.append(obj_label)

    # Create VtkAxes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # Create VtkBoxes for boxes
    vtk_2d_boxes = VtkBoxes()
    vtk_2d_boxes.set_objects(anchor_2d_objects, anchor_2d_colour_scheme)

    vtk_3d_boxes = VtkBoxes()
    vtk_3d_boxes.set_objects(anchor_3d_objects, anchor_3d_colour_scheme)

    vtk_unfiltered_boxes = VtkBoxes()
    vtk_unfiltered_boxes.set_objects(unfiltered_anchor_objects,
                                     anchor_unfiltered_colour_scheme)

    vtk_voxel_grid = VtkVoxelGrid()
    vtk_voxel_grid.set_voxels(vx_grid_3d)

    vtk_voxel_grid_2d = VtkVoxelGrid()
    vtk_voxel_grid_2d.set_voxels(vx_grid_2d)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()
    vtk_renderer.AddActor(vtk_2d_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_3d_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_unfiltered_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor)
    vtk_renderer.AddActor(vtk_voxel_grid_2d.vtk_actor)
    vtk_renderer.AddActor(axes)
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(170.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName("Anchors")
    vtk_render_window.SetSize(900, 500)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)
    vtk_render_window_interactor.SetInteractorStyle(
        vis_utils.ToggleActorsInteractorStyle([
            vtk_2d_boxes.vtk_actor,
            vtk_3d_boxes.vtk_actor,
            vtk_unfiltered_boxes.vtk_actor,
        ]))

    # Render in VTK
    vtk_render_window.Render()
    vtk_render_window_interactor.Start()
Example #19
0
    def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)
                img_roi_all = self.get_img_roi_data(sample_name)
                #img_roi      = all_img_rois[0]
                #img_roi_norm = all_img_rois[1]

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME:
                        sample_name,
                        constants.KEY_ANCHORS_INFO:
                        anchors_info,
                        # constants.KEY_IMG_ROI: img_roi,
                        constants.KEY_IMG_ROI_ALL:
                        img_roi_all
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)  ##  decoded images will have the channels
            # stored in B G R order.
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
            rgb_image = cv_bgr_image[..., ::-1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib_p2 = calib_utils.read_calibration(
                self.calib_dir, int(sample_name)).p2

            point_cloud = self.kitti_utils.get_point_cloud(
                self.bev_source, img_idx, image_shape)

            # Augmentation (Flipping)
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels
                ]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

                ## flipping the image rois
                img_roi_all = kitti_aug.flip_roi(
                    img_roi_all[0],
                    image_shape), kitti_aug.flip_roi_norm(img_roi_all[1])
                if anchors_info:
                    anchor_indices, anchors_ious, anchor_offsets, anchor_classes = anchors_info
                    anchor_offsets[:, 0] = -anchor_offsets[:, 0]
                    anchor_offsets = anchor_indices, anchors_ious, anchor_offsets, anchor_classes

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :,
                            0:3] = kitti_aug.apply_pca_jitter(image_input[:, :,
                                                                          0:3])

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_boxes_2d = np.asarray([
                    box_2d_encoder.object_label_to_box_2d(obj_label)
                    for obj_label in obj_labels
                ])

                ## augmentation of dataset
                if kitti_aug.AUG_FLIPPING in sample.augs:
                    label_boxes_2d = kitti_aug.flip_roi(
                        label_boxes_2d, image_shape)

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    img_roi_all = []
                    #img_roi = []
                    #img_roi_norm= []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps
            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane)

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))

            sample_dict = {
                # constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_BOXES_2D:
                label_boxes_2d,
                constants.KEY_LABEL_ANCHORS:
                label_anchors,
                constants.KEY_LABEL_CLASSES:
                label_classes,
                constants.KEY_IMAGE_INPUT:
                image_input,
                constants.KEY_BEV_INPUT:
                bev_input,
                constants.KEY_ANCHORS_INFO:
                anchors_info,
                constants.KEY_IMG_ROI_ALL:
                img_roi_all,
                #constants.KEY_IMG_ROI: img_roi,
                #constants.KEY_IMG_ROI_NORM:img_roi_norm,
                constants.KEY_POINT_CLOUD:
                point_cloud,
                constants.KEY_GROUND_PLANE:
                ground_plane,
                constants.KEY_STEREO_CALIB_P2:
                stereo_calib_p2,
                constants.KEY_SAMPLE_NAME:
                sample_name,
                constants.KEY_SAMPLE_AUGS:
                sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts
Example #20
0
def main():
    """This demo visualizes box 8C format predicted by MLOD, before
    getting converted to Box 3D.

    Keys:
        F1: Toggle predictions
        F2: Toggle easy ground truth objects (Green)
        F3: Toggle medium ground truth objects (Orange)
        F4: Toggle hard ground truth objects (Red)
        F5: Toggle all ground truth objects (default off)

        F6: Toggle 3D voxel grid
        F7: Toggle point cloud
    """
    ##############################
    # Options
    ##############################
    mlod_score_threshold = 0.1
    show_orientations = True

    checkpoint_name = 'mlod_exp_example'

    global_step = None
    global_step = 100000

    sample_name = None

    # # # Cars # # #
    sample_name = '000050'
    # sample_name = '000104'
    # sample_name = '000169'
    # sample_name = '000175'
    # sample_name = '000191'
    # sample_name = '000335'
    # sample_name = '000360'
    # sample_name = '001783'
    # sample_name = '001820'
    # sample_name = '006338'

    # # # People # # #
    # val_half split
    # sample_name = '000001'  # Hard, 1 far cyc
    # sample_name = '000005'  # Easy, 1 ped
    # sample_name = '000122'  # Easy, 1 cyc
    # sample_name = '000134'  # Hard, lots of people
    # sample_name = '000167'  # Medium, 1 ped, 2 cycs
    # sample_name = '000187'  # Medium, 1 ped on left
    # sample_name = '000381'  # Easy, 1 ped
    # sample_name = '000398'  # Easy, 1 ped
    # sample_name = '000401'  # Hard, obscured peds
    # sample_name = '000407'  # Easy, 1 ped
    # sample_name = '000448'  # Hard, several far people
    # sample_name = '000486'  # Hard 2 obscured peds
    # sample_name = '000509'  # Easy, 1 ped
    # sample_name = '000718'  # Hard, lots of people
    # sample_name = '002216'  # Easy, 1 cyc

    dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL_HALF)

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config)
    ##############################
    # Setup Paths
    ##############################

    # Random sample
    if sample_name is None:
        sample_idx = np.random.randint(0, dataset.num_samples)
        sample_name = dataset.sample_list[sample_idx]

    img_idx = int(sample_name)

    # Text files directory
    prediction_boxes_3d_dir = mlod.root_dir() + \
        '/data/outputs/' + checkpoint_name + '/predictions' +  \
        '/final_predictions_and_scores/' + dataset.data_split

    prediction_boxes_4c_dir = mlod.root_dir() + \
        '/data/outputs/' + checkpoint_name + '/predictions' +  \
        '/final_boxes_4c_and_scores/' + dataset.data_split

    # Get checkpoint step
    steps = os.listdir(prediction_boxes_3d_dir)
    steps.sort(key=int)
    print('Available steps: {}'.format(steps))

    # Use latest checkpoint if no index provided
    if global_step is None:
        global_step = steps[-1]

    ##############################
    # Load Predictions
    ##############################
    # Load prediction boxes_3d from files
    prediction_boxes_3d_and_scores = np.loadtxt(
        prediction_boxes_3d_dir +
        "/{}/{}.txt".format(global_step, sample_name))

    pred_boxes_3d = prediction_boxes_3d_and_scores[:, 0:7]
    # pred_boxes_3d_scores = prediction_boxes_3d_and_scores[:, 8]

    # Load prediction boxes_4c
    prediction_boxes_4c_and_scores = np.loadtxt(
        prediction_boxes_4c_dir +
        "/{}/{}.txt".format(global_step, sample_name))
    pred_boxes_4c = prediction_boxes_4c_and_scores[:, 0:10]
    pred_boxes_4c_scores = prediction_boxes_4c_and_scores[:, 10]

    # Filter by score
    score_mask = pred_boxes_4c_scores >= mlod_score_threshold
    pred_boxes_3d = pred_boxes_3d[score_mask]
    pred_boxes_4c = pred_boxes_4c[score_mask]

    ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)

    # Visualization
    all_vtk_boxes_4c = []
    for box_4c in pred_boxes_4c:
        vtk_box_4c = VtkBox4c()
        vtk_box_4c.set_box_4c(box_4c, ground_plane)
        all_vtk_boxes_4c.append(vtk_box_4c)

    # Convert boxes_3d to ObjectLabels
    pred_objs = np.asarray([
        box_3d_encoder.box_3d_to_object_label(box_3d, obj_type='Car')
        for box_3d in pred_boxes_3d
    ])
    vtk_boxes_3d = VtkBoxes()
    vtk_boxes_3d.set_objects(pred_objs,
                             VtkBoxes.COLOUR_SCHEME_KITTI,
                             show_orientations=True)

    ##############################
    # Ground Truth
    ##############################
    if dataset.has_labels:
        easy_gt_objs, medium_gt_objs, \
            hard_gt_objs, all_gt_objs = \
            demo_utils.get_gts_based_on_difficulty(dataset,
                                                   img_idx)
    else:
        easy_gt_objs = medium_gt_objs = hard_gt_objs = all_gt_objs = []

    ##############################
    # Point Cloud
    ##############################
    image_path = dataset.get_rgb_image_path(sample_name)
    image = cv2.imread(image_path)
    img_idx = int(sample_name)

    points, point_colours = demo_utils.get_filtered_pc_and_colours(
        dataset, image, img_idx)

    # # Voxelize the point cloud for visualization
    # voxel_grid = VoxelGrid()
    # voxel_grid.voxelize(points, voxel_size=0.1,
    #                     create_leaf_layout=False)

    ##############################
    # Visualization
    ##############################
    # # Create VtkVoxelGrid
    # vtk_voxel_grid = VtkVoxelGrid()
    # vtk_voxel_grid.set_voxels(voxel_grid)

    vtk_point_cloud = VtkPointCloud()
    vtk_point_cloud.set_points(points, point_colours)

    # Create VtkAxes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # Create VtkBoxes for ground truth
    vtk_easy_gt_boxes, vtk_medium_gt_boxes, \
        vtk_hard_gt_boxes, vtk_all_gt_boxes = \
        demo_utils.create_gt_vtk_boxes(easy_gt_objs,
                                       medium_gt_objs,
                                       hard_gt_objs,
                                       all_gt_objs,
                                       show_orientations)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)

    vtk_box_actors = vtk.vtkAssembly()

    # Prediction boxes
    for i in range(len(all_vtk_boxes_4c)):
        # Adding labels, slows down rendering
        vtk_renderer.AddActor(all_vtk_boxes_4c[i].vtk_text_labels.vtk_actor)
        vtk_box_actors.AddPart(all_vtk_boxes_4c[i].vtk_actor)

    vtk_renderer.AddActor(vtk_boxes_3d.vtk_actor)

    vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)

    vtk_renderer.AddActor(vtk_hard_gt_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_medium_gt_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_easy_gt_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_all_gt_boxes.vtk_actor)

    vtk_renderer.AddActor(vtk_box_actors)

    vtk_renderer.AddActor(axes)

    # Set initial properties for some actors
    vtk_point_cloud.vtk_actor.GetProperty().SetPointSize(2)

    vtk_all_gt_boxes.vtk_actor.SetVisibility(0)
    vtk_boxes_3d.vtk_actor.SetVisibility(0)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(160.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName(
        "Predictions: Step {}, Sample {}, Min Score {}".format(
            global_step,
            sample_name,
            mlod_score_threshold,
        ))

    vtk_render_window.SetSize(900, 600)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

    vtk_render_window_interactor.SetInteractorStyle(
        vis_utils.ToggleActorsInteractorStyle([
            vtk_box_actors,
            vtk_boxes_3d.vtk_actor,
            vtk_easy_gt_boxes.vtk_actor,
            vtk_medium_gt_boxes.vtk_actor,
            vtk_hard_gt_boxes.vtk_actor,
            vtk_all_gt_boxes.vtk_actor,
            vtk_point_cloud.vtk_actor,
        ]))

    vtk_render_window_interactor.Start()