コード例 #1
0
def main():
    """
    Visualization for comparison of anchor filtering with
        2D vs 3D integral images

    Keys:
        F1: Toggle 3D integral image filtered anchors
        F2: Toggle 2D integral image filtered anchors
        F3: Toggle 2D integral image empty anchors
    """

    anchor_2d_colour_scheme = {"Anchor": (0, 0, 255)}  # Blue
    anchor_3d_colour_scheme = {"Anchor": (0, 255, 0)}  # Green
    anchor_unfiltered_colour_scheme = {"Anchor": (255, 0, 255)}  # Purple

    # Create Dataset
    dataset = DatasetBuilder.build_kitti_dataset(
        DatasetBuilder.KITTI_TRAINVAL)

    sample_name = "000001"
    img_idx = int(sample_name)
    print("Showing anchors for sample {}".format(sample_name))

    # Options
    # These clusters are from the trainval set and give more 2D anchors than 3D
    clusters = np.array([[3.55, 1.835, 1.525], [4.173, 1.69, 1.49]])
    anchor_stride = [3.0, 3.0]

    ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)
    area_extents = np.array([[-40, 40], [-5, 3], [0, 70]])

    anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

    # Generate anchors
    start_time = time.time()
    anchor_boxes_3d = anchor_3d_generator.generate(area_3d=area_extents,
                                                   anchor_3d_sizes=clusters,
                                                   anchor_stride=anchor_stride,
                                                   ground_plane=ground_plane)
    end_time = time.time()
    print("Anchors generated in {} s".format(end_time - start_time))

    # Get point cloud
    point_cloud = obj_utils.get_stereo_point_cloud(img_idx, dataset.calib_dir,
                                                   dataset.disp_dir)

    ground_offset_dist = 0.2
    offset_dist = 2.0

    # Filter points within certain xyz range and offset from ground plane
    # Filter points within 0.2m of the road plane
    slice_filter = dataset.kitti_utils.create_slice_filter(point_cloud,
                                                           area_extents,
                                                           ground_plane,
                                                           ground_offset_dist,
                                                           offset_dist)
    points = np.array(point_cloud).T
    points = points[slice_filter]

    anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d)

    # Create 2D voxel grid
    vx_grid_2d = voxel_grid_2d.VoxelGrid2D()
    vx_grid_2d.voxelize_2d(points, 0.1, area_extents)

    # Create 3D voxel grid
    vx_grid_3d = voxel_grid.VoxelGrid()
    vx_grid_3d.voxelize(points, 0.1, area_extents)

    # Filter the boxes here!
    start_time = time.time()
    empty_filter_2d = anchor_filter.get_empty_anchor_filter_2d(
        anchors=anchors,
        voxel_grid_2d=vx_grid_2d,
        density_threshold=1)
    anchors_2d = anchor_boxes_3d[empty_filter_2d]
    end_time = time.time()
    print("2D Anchors filtered in {} s".format(end_time - start_time))
    print("Number of 2D anchors remaining: %d" % (anchors_2d.shape[0]))

    unfiltered_anchors_2d = anchor_boxes_3d[np.logical_not(empty_filter_2d)]

    # 3D filtering
    start_time = time.time()
    empty_filter_3d = anchor_filter.get_empty_anchor_filter(
        anchors=anchors,
        voxel_grid_3d=vx_grid_3d,
        density_threshold=1)
    anchor_boxes_3d = anchor_boxes_3d[empty_filter_3d]
    end_time = time.time()
    print("3D Anchors filtered in {} s".format(end_time - start_time))
    print("Number of 3D anchors remaining: %d" % (anchor_boxes_3d.shape[0]))

    anchor_2d_objects = []
    for anchor_idx in range(len(anchors_2d)):
        anchor = anchors_2d[anchor_idx]
        obj_label = box_3d_encoder.box_3d_to_object_label(anchor, 'Anchor')

        # Append to a list for visualization in VTK later
        anchor_2d_objects.append(obj_label)

    anchor_3d_objects = []
    for anchor_idx in range(len(anchor_boxes_3d)):
        anchor = anchor_boxes_3d[anchor_idx]
        obj_label = box_3d_encoder.box_3d_to_object_label(anchor, 'Anchor')

        # Append to a list for visualization in VTK later
        anchor_3d_objects.append(obj_label)

    unfiltered_anchor_objects = []
    for anchor_idx in range(len(unfiltered_anchors_2d)):
        anchor = unfiltered_anchors_2d[anchor_idx]
        obj_label = box_3d_encoder.box_3d_to_object_label(anchor, 'Anchor')

        # Append to a list for visualization in VTK later
        unfiltered_anchor_objects.append(obj_label)

    # Create VtkAxes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # Create VtkBoxes for boxes
    vtk_2d_boxes = VtkBoxes()
    vtk_2d_boxes.set_objects(anchor_2d_objects, anchor_2d_colour_scheme)

    vtk_3d_boxes = VtkBoxes()
    vtk_3d_boxes.set_objects(anchor_3d_objects, anchor_3d_colour_scheme)

    vtk_unfiltered_boxes = VtkBoxes()
    vtk_unfiltered_boxes.set_objects(unfiltered_anchor_objects,
                                     anchor_unfiltered_colour_scheme)

    vtk_voxel_grid = VtkVoxelGrid()
    vtk_voxel_grid.set_voxels(vx_grid_3d)

    vtk_voxel_grid_2d = VtkVoxelGrid()
    vtk_voxel_grid_2d.set_voxels(vx_grid_2d)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()
    vtk_renderer.AddActor(vtk_2d_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_3d_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_unfiltered_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor)
    vtk_renderer.AddActor(vtk_voxel_grid_2d.vtk_actor)
    vtk_renderer.AddActor(axes)
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(170.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName("Anchors")
    vtk_render_window.SetSize(900, 500)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)
    vtk_render_window_interactor.SetInteractorStyle(
        vis_utils.ToggleActorsInteractorStyle([
            vtk_2d_boxes.vtk_actor,
            vtk_3d_boxes.vtk_actor,
            vtk_unfiltered_boxes.vtk_actor,
        ]))

    # Render in VTK
    vtk_render_window.Render()
    vtk_render_window_interactor.Start()
コード例 #2
0
    def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_boxes_2d = np.zeros((1, 4))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            lidar_only = False
            num_views = 1

            if not lidar_only:
                # Load image (BGR -> RGB)
                cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
                rgb_image = cv_bgr_image[..., ::-1]
                image_shape = rgb_image.shape[0:2]

                # Append the depth channel
                if self.add_depth:
                    depth_map = obj_utils.get_depth_map(
                        img_idx, self.depth_dir)

                    # Set invalid pixels to max depth
                    depth_map[np.asarray(depth_map == 0.0)] = \
                        self.kitti_utils.bev_extents[1, 1]

                    # Add channel dimension to make stacking easier
                    depth_map = np.expand_dims(depth_map, 2)
                    image_input = np.concatenate([rgb_image, depth_map],
                                                 axis=2)
                else:
                    image_input = rgb_image
            else:
                image_shape = (370, 1224)

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)
            #ground_plane = np.array([0,-1,0,1.68])

            if lidar_only:
                p_matrix = np.zeros((num_views, 3, 4), dtype=float)
                if num_views > 0:
                    p_matrix[0] = np.array([[
                        8.39713500e+02, 3.58853400e+01, 4.48566750e+02,
                        2.31460650e+03
                    ],
                                            [
                                                1.02835238e-13, 8.54979440e+02,
                                                1.57320433e+02, 2.49655872e+03
                                            ],
                                            [
                                                0.00000000e+00, 7.97452000e-02,
                                                9.96815000e-01, 5.14357000e+00
                                            ]])

                    p_matrix[1] = np.array([[
                        1.20171708e+03, 9.73326000e+01, 3.99933320e+02,
                        1.04945816e+04
                    ],
                                            [
                                                1.41054657e+01, 8.65088160e+02,
                                                8.46334690e+01, 5.24229862e+03
                                            ],
                                            [
                                                1.62221000e-01, 1.62221000e-01,
                                                9.73329000e-01, 1.13555000e+01
                                            ]])
            else:
                # Get calibration
                stereo_calib_p2 = calib_utils.read_calibration(
                    self.calib_dir, int(sample_name)).p2

            point_cloud = self.kitti_utils.get_point_cloud(
                self.bev_source, img_idx, image_shape)
            # Augmentation (Flipping)
            if kitti_aug.AUG_FLIPPING in sample.augs:
                if not lidar_only:
                    image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label(obj, image_shape)
                    for obj in obj_labels
                ]

                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                if lidar_only:
                    for i in range(num_views):
                        p_matrix[i] = kitti_aug.flip_stereo_calib_p2(
                            p_matrix[i], image_shape)
                else:
                    stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                        stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if (kitti_aug.AUG_PCA_JITTER in sample.augs) and not lidar_only:
                image_input[:, :, 0:3] = kitti_aug.apply_pca_jitter(
                    image_input[:, :, 0:3], aug_img_noise=self.aug_img_noise)

            # Augmentation (Random Occlusion)
            if kitti_aug.AUG_RANDOM_OCC in sample.augs:
                point_cloud = kitti_aug.occ_aug(point_cloud, stereo_calib_p2,
                                                obj_labels)

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_boxes_2d = np.asarray([
                    box_3d_encoder.object_label_to_box_2d(obj_label)
                    for obj_label in obj_labels
                ])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                        label_boxes_2d = np.asarray([[-1.0, -1.0, -1.0, -1.0]])
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                        label_boxes_2d = np.zeros((1, 4))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps
            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane)
            height_maps = bev_images.get('height_maps')

            #bev random masking
            """
            bev_drop_p = 0.5
            rand_01 = random.random()
            mask_bev_layer = np.zeros(height_maps[0].shape,dtype=np.float32)
            if rand_01 > bev_drop_p:
                mask_idx = random.randint(0,4)
                height_maps[mask_idx] = mask_bev_layer
            """

            #print(height_maps[0].shape)
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))
            #bev_input = np.transpose(np.array(height_maps),(1,2,0))

            point_cloud = self.kitti_utils._apply_slice_filter(
                point_cloud, ground_plane).T

            if lidar_only:
                depth_map = np.zeros(
                    (num_views, image_shape[0], image_shape[1]), dtype=float)
                for i in range(num_views):
                    depth_map[i, :, :] = project_depths(
                        point_cloud, p_matrix[i], image_shape[0:2])
                depth_map_expand_dims = np.expand_dims(depth_map, axis=-1)
                sample_dict = {
                    constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                    constants.KEY_LABEL_ANCHORS: label_anchors,
                    constants.KEY_LABEL_CLASSES: label_classes,
                    constants.KEY_IMAGE_INPUT: depth_map_expand_dims,
                    constants.KEY_BEV_INPUT: bev_input,
                    constants.KEY_ANCHORS_INFO: anchors_info,
                    constants.KEY_POINT_CLOUD: point_cloud,
                    constants.KEY_GROUND_PLANE: ground_plane,
                    constants.KEY_STEREO_CALIB_P2: p_matrix[0:num_views],
                    constants.KEY_SAMPLE_NAME: sample_name,
                    constants.KEY_SAMPLE_AUGS: sample.augs,
                    constants.KEY_DPT_INPUT: depth_map
                }
            else:
                depth_map = project_depths(point_cloud, stereo_calib_p2,
                                           image_shape[0:2])
                depth_map = np.expand_dims(depth_map, axis=0)
                sample_dict = {
                    constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                    constants.KEY_LABEL_BOXES_2D: label_boxes_2d,
                    constants.KEY_LABEL_ANCHORS: label_anchors,
                    constants.KEY_LABEL_CLASSES: label_classes,
                    constants.KEY_IMAGE_INPUT: image_input,
                    constants.KEY_BEV_INPUT: bev_input,
                    constants.KEY_ANCHORS_INFO: anchors_info,
                    constants.KEY_POINT_CLOUD: point_cloud,
                    constants.KEY_GROUND_PLANE: ground_plane,
                    constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
                    constants.KEY_SAMPLE_NAME: sample_name,
                    constants.KEY_SAMPLE_AUGS: sample.augs,
                    constants.KEY_DPT_INPUT: depth_map
                }

            sample_dicts.append(sample_dict)

        return sample_dicts
コード例 #3
0
    def test_get_empty_anchor_filter_in_2d(self):
        # create generic ground plane (normal vector is straight up)
        area_extent = [(0., 2.), (-1., 0.), (0., 2.)]

        # Creates a voxel grid in following format at y = bin (-1.5, -0.5]
        # [ ][ ][ ][ ]
        # [ ][ ][x][ ]
        # [ ][ ][ ][ ]
        # [ ][ ][x][ ]
        pts = np.array([[0.51, -0.5, 1.1], [1.51, -0.5, 1.1]])

        voxel_size = 0.5
        voxel_grid = VoxelGrid()
        voxel_grid.voxelize(pts, voxel_size, extents=area_extent)

        # Define anchors to test
        boxes_3d = np.array([
            [0.51, 0, 0.51, 1, 1, 1, 0],
            [0.51, 0, 0.51, 1, 1, 1, np.pi / 2.],
            [0.51, 0, 1.1, 1, 1, 1, 0],
            [0.51, 0, 1.1, 1, 1, 1, np.pi / 2.],
            [1.51, 0, 0.51, 1, 1, 1, 0],
            [1.51, 0, 0.51, 1, 1, 1, np.pi / 2.],
            [1.51, 0, 1.1, 1, 1, 1, 0],
            [1.51, 0, 1.1, 1, 1, 1, np.pi / 2.],
        ])

        anchors = box_3d_encoder.box_3d_to_anchor(boxes_3d)

        # test anchor locations, number indicates the anchors indices
        # [ ][ ][ ][ ]
        # [ ][1][3][ ]
        # [ ][ ][ ][ ]
        # [ ][5][7][ ]

        gen_filter = anchor_filter.get_empty_anchor_filter(anchors,
                                                           voxel_grid,
                                                           density_threshold=1)

        expected_filter = np.array(
            [False, False, True, True, False, False, True, True])

        self.assertTrue((gen_filter == expected_filter).all())

        boxes_3d = np.array([
            [0.5, 0, 0.5, 2, 1, 1, 0],  # case 1
            [0.5, 0, 0.5, 2, 1, 1, np.pi / 2.],
            [0.5, 0, 1.5, 1, 2, 1, 0],  # case 2
            [0.5, 0, 1.5, 1, 2, 1, np.pi / 2.],
            [1.5, 0, 0.5, 2, 1, 1, 0],  # case 3
            [1.5, 0, 0.5, 2, 1, 1, np.pi / 2.],
            [1.5, 0, 1.5, 1, 2, 1, 0],  # case 4
            [1.5, 0, 1.5, 1, 2, 1, np.pi / 2.]
        ])

        anchors = box_3d_encoder.box_3d_to_anchor(boxes_3d)

        # case 1
        # [ ][ ][ ][ ]   [ ][ ][ ][ ]
        # [ ][o][ ][ ]   [ ][o][o][ ]
        # [ ][o][ ][ ]   [ ][ ][ ][ ]
        # [ ][ ][ ][ ]   [ ][ ][ ][ ]

        # case 2
        # [ ][ ][ ][ ]   [ ][ ][ ][ ]
        # [ ][ ][o][o]   [ ][ ][o][ ]
        # [ ][ ][ ][ ]   [ ][ ][o][ ]
        # [ ][ ][ ][ ]   [ ][ ][ ][ ]

        # case 3
        # [ ][ ][ ][ ]   [ ][ ][ ][ ]
        # [ ][ ][ ][ ]   [ ][ ][ ][ ]
        # [ ][o][ ][ ]   [ ][o][o][ ]
        # [ ][o][ ][ ]   [ ][ ][ ][ ]

        # case 4
        # [ ][ ][ ][ ]   [ ][ ][ ][ ]
        # [ ][ ][ ][ ]   [ ][ ][ ][ ]
        # [ ][ ][o][o]   [ ][ ][o][ ]
        # [ ][ ][ ][ ]   [ ][ ][o][ ]

        gen_filter = anchor_filter.get_empty_anchor_filter(anchors,
                                                           voxel_grid,
                                                           density_threshold=1)
        expected_filter = np.array(
            [False, True, True, True, False, True, True, True])

        self.assertTrue((gen_filter == expected_filter).all())
コード例 #4
0
ファイル: box_8c_encoder.py プロジェクト: songsanling/MLOD
def np_box_3d_to_box_8c(box_3d):
    """Computes the 3D bounding box corner positions from box_3d format.

    This function does not preserve corners order but rather the corners
    are rotated to the nearest 90 degree angle. This helps in calculating
    the closest corner to corner when comparing the corners to the ground-
    truth boxes.

    Args:
        box_3d: ndarray of size (7,) representing box_3d in the format
            [x, y, z, l, w, h, ry]
    Returns:
        corners_3d: An ndarray or a tensor of shape (3 x 8) representing
            the box as corners in following format -> [[x1,...,x8],[y1...,y8],
            [z1,...,z8]].
    """

    format_checker.check_box_3d_format(box_3d)

    # This function is vectorized and returns an ndarray
    anchor = box_3d_encoder.box_3d_to_anchor(box_3d, ortho_rotate=True)[0]

    centroid_x = anchor[0]
    centroid_y = anchor[1]
    centroid_z = anchor[2]
    dim_x = anchor[3]
    dim_y = anchor[4]
    dim_z = anchor[5]

    half_dim_x = dim_x / 2
    half_dim_z = dim_z / 2

    # 3D BB corners
    x_corners = np.array([
        half_dim_x, half_dim_x, -half_dim_x, -half_dim_x, half_dim_x,
        half_dim_x, -half_dim_x, -half_dim_x
    ])

    y_corners = np.array([0.0, 0.0, 0.0, 0.0, -dim_y, -dim_y, -dim_y, -dim_y])

    z_corners = np.array([
        half_dim_z, -half_dim_z, -half_dim_z, half_dim_z, half_dim_z,
        -half_dim_z, -half_dim_z, half_dim_z
    ])

    ry = box_3d[6]

    # Find nearest 90 degree
    half_pi = np.pi / 2
    ortho_ry = np.round(ry / half_pi) * half_pi

    # Find rotation to make the box ortho aligned
    ry_diff = ry - ortho_ry

    # Compute transform matrix
    # This includes rotation and translation
    rot = np.array([[np.cos(ry_diff), 0,
                     np.sin(ry_diff), centroid_x], [0, 1, 0, centroid_y],
                    [-np.sin(ry_diff), 0,
                     np.cos(ry_diff), centroid_z]])

    # Create a ones column
    ones_col = np.ones(x_corners.shape)

    # Append the column of ones to be able to multiply
    box_8c = np.dot(rot, np.array([x_corners, y_corners, z_corners, ones_col]))
    # Ignore the fourth column
    box_8c = box_8c[0:3]

    return box_8c
コード例 #5
0
ファイル: anchor_filter_vis.py プロジェクト: songsanling/MLOD
def main():
    """
    Visualization of anchor filtering using 3D integral images
    """

    anchor_colour_scheme = {
        "Car": (0, 255, 0),  # Green
        "Pedestrian": (255, 150, 50),  # Orange
        "Cyclist": (150, 50, 100),  # Purple
        "DontCare": (255, 0, 0),  # Red
        "Anchor": (0, 0, 255),  # Blue
    }

    # Create Dataset
    dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL)

    # Options
    clusters, _ = dataset.get_cluster_info()
    sample_name = "000000"
    img_idx = int(sample_name)
    anchor_stride = [0.5, 0.5]
    ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)

    anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator(
        anchor_3d_sizes=clusters, anchor_stride=anchor_stride)

    area_extents = np.array([[-40, 40], [-5, 3], [0, 70]])

    # Generate anchors in box_3d format
    start_time = time.time()
    anchor_boxes_3d = anchor_3d_generator.generate(area_3d=area_extents,
                                                   ground_plane=ground_plane)
    end_time = time.time()
    print("Anchors generated in {} s".format(end_time - start_time))

    point_cloud = obj_utils.get_lidar_point_cloud(img_idx, dataset.calib_dir,
                                                  dataset.velo_dir)

    offset_dist = 2.0

    # Filter points within certain xyz range and offset from ground plane
    offset_filter = obj_utils.get_point_filter(point_cloud, area_extents,
                                               ground_plane, offset_dist)

    # Filter points within 0.2m of the road plane
    road_filter = obj_utils.get_point_filter(point_cloud, area_extents,
                                             ground_plane, 0.1)

    slice_filter = np.logical_xor(offset_filter, road_filter)
    point_cloud = point_cloud.T[slice_filter]

    # Generate Voxel Grid
    vx_grid_3d = voxel_grid.VoxelGrid()
    vx_grid_3d.voxelize(point_cloud, 0.1, area_extents)

    # Anchors in anchor format
    all_anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d)

    # Filter the boxes here!
    start_time = time.time()
    empty_filter = \
        anchor_filter.get_empty_anchor_filter(anchors=all_anchors,
                                              voxel_grid_3d=vx_grid_3d,
                                              density_threshold=1)
    anchor_boxes_3d = anchor_boxes_3d[empty_filter]
    end_time = time.time()
    print("Anchors filtered in {} s".format(end_time - start_time))

    # Visualize GT boxes
    # Grab ground truth
    ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx)

    # ----------
    # Test Sample extraction

    # Visualize from here
    vis_utils.visualization(dataset.rgb_image_dir, img_idx)
    plt.show(block=False)

    image_path = dataset.get_rgb_image_path(sample_name)
    image_shape = np.array(Image.open(image_path)).shape
    rgb_boxes, rgb_normalized_boxes = \
        anchor_projector.project_to_image_space(all_anchors, dataset,
                                                image_shape, img_idx)

    # Overlay boxes on images
    anchor_objects = []
    for anchor_idx in range(len(anchor_boxes_3d)):
        anchor_box_3d = anchor_boxes_3d[anchor_idx]
        obj_label = box_3d_encoder.box_3d_to_object_label(
            anchor_box_3d, 'Anchor')
        # Append to a list for visualization in VTK later
        anchor_objects.append(obj_label)

    for idx in range(len(ground_truth_list)):
        ground_truth_obj = ground_truth_list[idx]
        # Append to a list for visualization in VTK later
        anchor_objects.append(ground_truth_obj)

    # Create VtkAxes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # Create VtkBoxes for boxes
    vtk_boxes = VtkBoxes()
    vtk_boxes.set_objects(anchor_objects, anchor_colour_scheme)

    vtk_point_cloud = VtkPointCloud()
    vtk_point_cloud.set_points(point_cloud)

    vtk_voxel_grid = VtkVoxelGrid()
    vtk_voxel_grid.set_voxels(vx_grid_3d)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()
    vtk_renderer.AddActor(vtk_boxes.vtk_actor)
    # vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)
    vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor)
    vtk_renderer.AddActor(axes)
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(170.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName("Anchors")
    vtk_render_window.SetSize(900, 500)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

    vtk_render_window_interactor.SetInteractorStyle(
        vtk.vtkInteractorStyleTrackballCamera())

    # Render in VTK
    vtk_render_window.Render()
    vtk_render_window_interactor.Start()  # Blocking
コード例 #6
0
def main():
    """This demo shows RPN proposals and MLOD predictions in 3D
    and 2D in image space. Given certain thresholds for proposals
    and predictions, it selects and draws the bounding boxes on
    the image sample. It goes through the entire proposal and
    prediction samples for the given dataset split.

    The proposals, overlaid, and prediction images can be toggled on or off
    separately in the options section.
    The prediction score and IoU with ground truth can be toggled on or off
    as well, shown as (score, IoU) above the detection.
    """
    dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL)

    ##############################
    # Options
    ##############################
    dataset_config.data_split = 'val'

    fig_size = (10, 6.1)

    rpn_score_threshold = 0.1
    mlod_score_threshold = 0.1

    # Flag for projecting the 3D boxes to image space
    # in tensor format (for testing purposes)
    test_img_tensor_projection = False

    gt_classes = ['Pedestrian', 'Cyclist']
    # gt_classes = ['Pedestrian', 'Cyclist']

    # Overwrite this to select a specific checkpoint
    global_step = 44000
    checkpoint_name = 'mlod_fpn_people'

    # Drawing Toggles
    draw_proposals_separate = False
    draw_overlaid = False
    draw_predictions_separate = True

    # Show orientation for both GT and proposals/predictions
    draw_orientations_on_prop = False
    draw_orientations_on_pred = False

    # Draw 2D bounding boxes
    draw_projected_2d_boxes = False

    # Save images for samples with no detections
    save_empty_images = True

    draw_score = True
    draw_iou = False
    iou_3d = False
    ##############################
    # End of Options
    ##############################

    # Get the dataset
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config)

    # Setup Paths
    predictions_dir = mlod.root_dir() + \
        '/data/outputs/' + checkpoint_name + '/predictions'

    proposals_and_scores_dir = predictions_dir + \
        '/proposals_and_scores/' + dataset.data_split

    predictions_and_scores_dir = predictions_dir + \
        '/final_predictions_and_scores/' + dataset.data_split

    # Output images directories
    output_dir_base = predictions_dir + '/images_2d'

    # Get checkpoint step
    steps = os.listdir(proposals_and_scores_dir)
    steps.sort(key=int)
    print('Available steps: {}'.format(steps))

    # Use latest checkpoint if no index provided
    if global_step is None:
        global_step = steps[-1]

    if draw_proposals_separate:
        prop_out_dir = output_dir_base + '/proposals/{}/{}/{}'.format(
            dataset.data_split, global_step, rpn_score_threshold)

        if not os.path.exists(prop_out_dir):
            os.makedirs(prop_out_dir)

        print('Proposal images saved to:', prop_out_dir)

    if draw_overlaid:
        overlaid_out_dir = output_dir_base + '/overlaid/{}/{}/{}'.format(
            dataset.data_split, global_step, mlod_score_threshold)

        if not os.path.exists(overlaid_out_dir):
            os.makedirs(overlaid_out_dir)

        print('Overlaid images saved to:', overlaid_out_dir)

    if draw_predictions_separate:
        pred_out_dir = output_dir_base + '/predictions/{}/{}/{}'.format(
            dataset.data_split, global_step,
            mlod_score_threshold)

        if not os.path.exists(pred_out_dir):
            os.makedirs(pred_out_dir)

        print('Prediction images saved to:', pred_out_dir)

    # Rolling average array of times for time estimation
    avg_time_arr_length = 10
    last_times = np.repeat(time.time(), avg_time_arr_length) + \
        np.arange(avg_time_arr_length)

    for sample_idx in range(dataset.num_samples):
        # Estimate time remaining with 5 slowest times
        start_time = time.time()
        last_times = np.roll(last_times, -1)
        last_times[-1] = start_time
        avg_time = np.mean(np.sort(np.diff(last_times))[-5:])
        samples_remaining = dataset.num_samples - sample_idx
        est_time_left = avg_time * samples_remaining

        # Print progress and time remaining estimate
        sys.stdout.write('\rSaving {} / {}, Avg Time: {:.3f}s, '
                         'Time Remaining: {:.2f}s'. format(
                             sample_idx + 1,
                             dataset.num_samples,
                             avg_time,
                             est_time_left))
        sys.stdout.flush()

        sample_name = dataset.sample_names[sample_idx]
        img_idx = int(sample_name)

        ##############################
        # Proposals
        ##############################
        if draw_proposals_separate or draw_overlaid:
            # Load proposals from files
            proposals_file_path = proposals_and_scores_dir + \
                "/{}/{}.txt".format(global_step, sample_name)
            if not os.path.exists(proposals_file_path):
                print('Sample {}: No proposals, skipping'.format(sample_name))
                continue
            print('Sample {}: Drawing proposals'.format(sample_name))

            proposals_and_scores = np.loadtxt(proposals_file_path)

            proposal_boxes_3d = proposals_and_scores[:, 0:7]
            proposal_scores = proposals_and_scores[:, 7]

            # Apply score mask to proposals
            score_mask = proposal_scores > rpn_score_threshold
            proposal_boxes_3d = proposal_boxes_3d[score_mask]
            proposal_scores = proposal_scores[score_mask]

            proposal_objs = \
                [box_3d_encoder.box_3d_to_object_label(proposal,
                                                       obj_type='Proposal')
                 for proposal in proposal_boxes_3d]

        ##############################
        # Predictions
        ##############################
        if draw_predictions_separate or draw_overlaid:
            predictions_file_path = predictions_and_scores_dir + \
                "/{}/{}.txt".format(global_step,
                                    sample_name)
            if not os.path.exists(predictions_file_path):
                continue

            # Load predictions from files
            predictions_and_scores = np.loadtxt(
                predictions_and_scores_dir +
                "/{}/{}.txt".format(global_step,
                                    sample_name))

            prediction_boxes_3d = predictions_and_scores[:, 0:7]
            prediction_scores = predictions_and_scores[:, 7]
            prediction_class_indices = predictions_and_scores[:, 8]

            # process predictions only if we have any predictions left after
            # masking
            if len(prediction_boxes_3d) > 0:

                # Apply score mask
                
                mlod_score_mask = prediction_scores >= 0.1
                mlod_show_mask = mlod_score_mask

                prediction_boxes_3d = prediction_boxes_3d[mlod_show_mask]
                prediction_scores = prediction_scores[mlod_show_mask]
                prediction_class_indices = \
                    prediction_class_indices[mlod_show_mask]

                # # Swap l, w for predictions where w > l
                # swapped_indices = \
                #     prediction_boxes_3d[:, 4] > prediction_boxes_3d[:, 3]
                # prediction_boxes_3d = np.copy(prediction_boxes_3d)
                # prediction_boxes_3d[swapped_indices, 3] = \
                #     prediction_boxes_3d[swapped_indices, 4]
                # prediction_boxes_3d[swapped_indices, 4] = \
                #     prediction_boxes_3d[swapped_indices, 3]

        ##############################
        # Ground Truth
        ##############################

        # Get ground truth labels
        dataset.has_labels = False
        if dataset.has_labels:
            gt_objects = obj_utils.read_labels(dataset.label_dir, img_idx)
        else:
            gt_objects = []

        # Filter objects to desired difficulty
        filtered_gt_objs = dataset.kitti_utils.filter_labels(
            gt_objects, classes=gt_classes)

        boxes2d, _, _ = obj_utils.build_bbs_from_objects(
            filtered_gt_objs, class_needed=gt_classes)

        image_path = dataset.get_rgb_image_path(sample_name)
        image = Image.open(image_path)
        image_size = image.size

        # Read the stereo calibration matrix for visualization
        stereo_calib = calib_utils.read_calibration(dataset.calib_dir,
                                                    img_idx)
        calib_p2 = stereo_calib.p2

        ##############################
        # Reformat and prepare to draw
        ##############################
        if draw_proposals_separate or draw_overlaid:
            proposals_as_anchors = box_3d_encoder.box_3d_to_anchor(
                proposal_boxes_3d)

            if test_img_tensor_projection:
                proposal_boxes = demo_utils.tf_project_to_image_space(
                    proposals_as_anchors, calib_p2, image_size, img_idx)

            else:
                proposal_boxes, _ = anchor_projector.project_to_image_space(
                    proposals_as_anchors, calib_p2, image_size)

            num_of_proposals = proposal_boxes_3d.shape[0]

            prop_fig, prop_2d_axes, prop_3d_axes = \
                vis_utils.visualization(dataset.rgb_image_dir,
                                        img_idx,
                                        display=False)

            draw_proposals(filtered_gt_objs,
                           calib_p2,
                           num_of_proposals,
                           proposal_objs,
                           proposal_boxes,
                           prop_2d_axes,
                           prop_3d_axes,
                           draw_orientations_on_prop)

            if draw_proposals_separate:
                # Save just the proposals
                filename = prop_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)

                if not draw_overlaid:
                    plt.close(prop_fig)

        if draw_overlaid or draw_predictions_separate:
            if len(prediction_boxes_3d) > 0:
                # Project the 3D box predictions to image space
                image_filter = []
                final_boxes_2d = []
                for i in range(len(prediction_boxes_3d)):
                    box_3d = prediction_boxes_3d[i, 0:7]
                    img_box = box_3d_projector.project_to_image_space(
                        box_3d, calib_p2,
                        truncate=True, image_size=image_size,
                        discard_before_truncation=False)
                    if img_box is not None:
                        image_filter.append(True)
                        final_boxes_2d.append(img_box)
                    else:
                        image_filter.append(False)
                final_boxes_2d = np.asarray(final_boxes_2d)
                final_prediction_boxes_3d = prediction_boxes_3d[image_filter]
                final_scores = prediction_scores[image_filter]
                final_class_indices = prediction_class_indices[image_filter]

                num_of_predictions = final_boxes_2d.shape[0]

                # Convert to objs
                final_prediction_objs = \
                    [box_3d_encoder.box_3d_to_object_label(
                        prediction, obj_type='Prediction')
                        for prediction in final_prediction_boxes_3d]
                for (obj, score) in zip(final_prediction_objs, final_scores):
                    obj.score = score
            else:
                if save_empty_images:
                    pred_fig, pred_2d_axes, pred_3d_axes = \
                        vis_utils.visualization(dataset.rgb_image_dir,
                                                img_idx,
                                                display=False,
                                                fig_size=fig_size)
                    filename = pred_out_dir + '/' + sample_name + '.png'
                    plt.savefig(filename)
                    plt.close(pred_fig)
                continue

            if draw_overlaid:
                # Overlay prediction boxes on image
                draw_predictions(filtered_gt_objs,
                                 calib_p2,
                                 num_of_predictions,
                                 final_prediction_objs,
                                 final_class_indices,
                                 final_boxes_2d,
                                 prop_2d_axes,
                                 prop_3d_axes,
                                 draw_score,
                                 draw_iou,
                                 gt_classes,
                                 draw_orientations_on_pred,
                                 iou_3d)
                filename = overlaid_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)

                plt.close(prop_fig)

            if draw_predictions_separate:
                # Now only draw prediction boxes on images
                # on a new figure handler
                if draw_projected_2d_boxes:
                    pred_fig, pred_2d_axes, pred_3d_axes = \
                        vis_utils.visualization(dataset.rgb_image_dir,
                                                img_idx,
                                                display=False,
                                                fig_size=fig_size)

                    draw_predictions(filtered_gt_objs,
                                     calib_p2,
                                     num_of_predictions,
                                     final_prediction_objs,
                                     final_class_indices,
                                     final_boxes_2d,
                                     pred_2d_axes,
                                     pred_3d_axes,
                                     draw_score,
                                     draw_iou,
                                     gt_classes,
                                     draw_orientations_on_pred,
                                     iou_3d)
                else:
                    pred_fig, pred_3d_axes = \
                        vis_utils.visualize_single_plot(
                            dataset.rgb_image_dir, img_idx, display=False)

                    draw_3d_predictions(filtered_gt_objs,
                                        calib_p2,
                                        num_of_predictions,
                                        final_prediction_objs,
                                        final_class_indices,
                                        final_boxes_2d,
                                        pred_3d_axes,
                                        draw_score,
                                        draw_iou,
                                        gt_classes,
                                        draw_orientations_on_pred)
                filename = pred_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)
                plt.close(pred_fig)

    print('\nDone')
コード例 #7
0
ファイル: box_4c_encoder.py プロジェクト: songsanling/MLOD
def np_box_3d_to_box_4c(box_3d, ground_plane):
    """Converts a single box_3d to box_4c

    Args:
        box_3d: box_3d (6,)
        ground_plane: ground plane coefficients (4,)

    Returns:
        box_4c (10,)
    """
    format_checker.check_box_3d_format(box_3d)

    anchor = box_3d_encoder.box_3d_to_anchor(box_3d, ortho_rotate=True)[0]

    centroid_x = anchor[0]
    centroid_y = anchor[1]
    centroid_z = anchor[2]
    dim_x = anchor[3]
    dim_y = anchor[4]
    dim_z = anchor[5]

    # Create temporary box at (0, 0) for rotation
    half_dim_x = dim_x / 2
    half_dim_z = dim_z / 2

    # Box corners
    x_corners = np.asarray([half_dim_x, half_dim_x,
                            -half_dim_x, -half_dim_x])

    z_corners = np.array([half_dim_z, -half_dim_z,
                          -half_dim_z, half_dim_z])

    ry = box_3d[6]

    # Find nearest 90 degree
    half_pi = np.pi / 2
    ortho_ry = np.round(ry / half_pi) * half_pi

    # Find rotation to make the box ortho aligned
    ry_diff = ry - ortho_ry

    # Create transformation matrix, including rotation and translation
    tr_mat = np.array([[np.cos(ry_diff), np.sin(ry_diff), centroid_x],
                       [-np.sin(ry_diff), np.cos(ry_diff), centroid_z],
                       [0, 0, 1]])

    # Create a ones row
    ones_row = np.ones(x_corners.shape)

    # Append the column of ones to be able to multiply
    points_stacked = np.vstack([x_corners, z_corners, ones_row])
    corners = np.matmul(tr_mat, points_stacked)

    # Discard the last row (ones)
    corners = corners[0:2]

    # Calculate height off ground plane
    ground_y = geometry_utils.calculate_plane_point(
        ground_plane, [centroid_x, None, centroid_z])[1]
    h1 = ground_y - centroid_y
    h2 = h1 + dim_y

    # Stack into (10,) ndarray
    box_4c = np.hstack([corners.flatten(), h1, h2])
    return box_4c
コード例 #8
0
def main():
    """
    This demo shows example mini batch info for full MlodModel training.
        This includes ground truth, ortho rotated ground truth,
        negative proposal anchors, positive proposal anchors, and a sampled
        mini batch.

        The 2D iou can be modified to show the effect of changing the iou
        threshold for mini batch sampling.

        In order to let this demo run without training an RPN, the proposals
        shown are being read from a text file.

    Keys:
        F1: Toggle ground truth
        F2: Toggle ortho rotated ground truth
        F3: Toggle negative proposal anchors
        F4: Toggle positive proposal anchors
        F5: Toggle mini batch anchors
    """

    ##############################
    #  Options
    ##############################
    # Config file folder, default (<mlod_root>/data/outputs/<checkpoint_name>)
    config_dir = None

    # checkpoint_name = None
    checkpoint_name = 'mlod_exp_example'
    data_split = 'val_half'

    # global_step = None
    global_step = 100000

    # # # Cars # # #
    # sample_name = "000050"
    sample_name = "000104"
    # sample_name = "000764"

    # # # People # # #
    # val_half
    # sample_name = '000001'  # Hard, 1 far cyc
    # sample_name = '000005'  # Easy, 1 ped
    # sample_name = '000122'  # Easy, 1 cyc
    # sample_name = '000134'  # Hard, lots of people
    # sample_name = '000167'  # Medium, 1 ped, 2 cycs
    # sample_name = '000187'  # Medium, 1 ped on left
    # sample_name = '000381'  # Easy, 1 ped
    # sample_name = '000398'  # Easy, 1 ped
    # sample_name = '000401'  # Hard, obscured peds
    # sample_name = '000407'  # Easy, 1 ped
    # sample_name = '000448'  # Hard, several far people
    # sample_name = '000486'  # Hard 2 obscured peds
    # sample_name = '000509'  # Easy, 1 ped
    # sample_name = '000718'  # Hard, lots of people
    # sample_name = '002216'  # Easy, 1 cyc

    mini_batch_size = 512
    neg_proposal_2d_iou_hi = 0.6
    pos_proposal_2d_iou_lo = 0.65

    bkg_proposals_line_width = 0.5
    neg_proposals_line_width = 0.5
    mid_proposals_line_width = 0.5
    pos_proposals_line_width = 1.0

    ##############################
    # End of Options
    ##############################

    img_idx = int(sample_name)
    print("Showing mini batch for sample {}".format(sample_name))

    # Read proposals from file
    if checkpoint_name is None:
        # Use VAL Dataset
        dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_VAL)

        # Load demo proposals
        proposals_and_scores_dir = mlod.top_dir() + \
            '/demos/data/predictions/' + checkpoint_name + \
            '/proposals_and_scores/' + dataset.data_split
    else:
        if config_dir is None:
            config_dir = mlod.root_dir() + '/data/outputs/' + checkpoint_name

        # Parse experiment config
        pipeline_config_file = \
            config_dir + '/' + checkpoint_name + '.config'
        _, _, _, dataset_config = \
            config_builder_util.get_configs_from_pipeline_file(
                pipeline_config_file, is_training=False)

        dataset_config.data_split = data_split
        dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                     use_defaults=False)

        # Overwrite
        mini_batch_utils = dataset.kitti_utils.mini_batch_utils
        mini_batch_utils.mlod_neg_iou_range[1] = neg_proposal_2d_iou_hi
        mini_batch_utils.mlod_pos_iou_range[0] = pos_proposal_2d_iou_lo

        # Load proposals from outputs folder
        proposals_and_scores_dir = mlod.root_dir() + \
            '/data/outputs/' + checkpoint_name + \
            '/predictions/proposals_and_scores/' + dataset.data_split

    # Get checkpoint step
    steps = os.listdir(proposals_and_scores_dir)
    steps.sort(key=int)
    print('Available steps: {}'.format(steps))

    # Use latest checkpoint if no index provided
    if global_step is None:
        global_step = steps[-1]

    proposals_and_scores = np.loadtxt(
        proposals_and_scores_dir +
        "/{}/{}.txt".format(global_step, sample_name))
    proposal_boxes_3d = proposals_and_scores[:, 0:7]
    proposal_anchors = box_3d_encoder.box_3d_to_anchor(proposal_boxes_3d)

    # Get filtered ground truth
    obj_labels = obj_utils.read_labels(dataset.label_dir, img_idx)
    filtered_objs = dataset.kitti_utils.filter_labels(obj_labels)

    # Convert ground truth to anchors
    gt_boxes_3d = np.asarray([
        box_3d_encoder.object_label_to_box_3d(obj_label)
        for obj_label in filtered_objs
    ])
    gt_anchors = box_3d_encoder.box_3d_to_anchor(gt_boxes_3d,
                                                 ortho_rotate=True)

    # Ortho rotate ground truth
    gt_ortho_boxes_3d = box_3d_encoder.anchors_to_box_3d(gt_anchors)
    gt_ortho_objs = [
        box_3d_encoder.box_3d_to_object_label(box_3d, obj_type='OrthoGt')
        for box_3d in gt_ortho_boxes_3d
    ]

    # Project gt and anchors into BEV
    gt_bev_anchors, _ = \
        anchor_projector.project_to_bev(gt_anchors,
                                        dataset.kitti_utils.bev_extents)
    bev_anchors, _ = \
        anchor_projector.project_to_bev(proposal_anchors,
                                        dataset.kitti_utils.bev_extents)

    # Reorder boxes into (y1, x1, y2, x2) order
    gt_bev_anchors_tf_order = anchor_projector.reorder_projected_boxes(
        gt_bev_anchors)
    bev_anchors_tf_order = anchor_projector.reorder_projected_boxes(
        bev_anchors)

    # Convert to box_list format for iou calculation
    gt_anchor_box_list = box_list.BoxList(
        tf.cast(gt_bev_anchors_tf_order, tf.float32))
    anchor_box_list = box_list.BoxList(
        tf.cast(bev_anchors_tf_order, tf.float32))

    # Get IoU for every anchor
    tf_all_ious = box_list_ops.iou(gt_anchor_box_list, anchor_box_list)
    valid_ious = True
    # Make sure the calculated IoUs contain values. Since its a [N, M]
    # tensor, if there are no gt's for instance, that entry will be zero.
    if tf_all_ious.shape[0] == 0 or tf_all_ious.shape[1] == 0:
        print('#################################################')
        print('Warning: This sample does not contain valid IoUs')
        print('#################################################')
        valid_ious = False

    if valid_ious:
        tf_max_ious = tf.reduce_max(tf_all_ious, axis=0)
        tf_max_iou_indices = tf.argmax(tf_all_ious, axis=0)

        # Sample an RPN mini batch from the non empty anchors
        mini_batch_utils = dataset.kitti_utils.mini_batch_utils

        # Overwrite mini batch size and sample a mini batch
        mini_batch_utils.mlod_mini_batch_size = mini_batch_size
        mb_mask_tf, _ = mini_batch_utils.sample_mlod_mini_batch(tf_max_ious)

        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)

        # Run the graph to calculate ious for every proposal and
        # to get the mini batch mask
        all_ious, max_ious, max_iou_indices = sess.run(
            [tf_all_ious, tf_max_ious, tf_max_iou_indices])
        mb_mask = sess.run(mb_mask_tf)

        mb_anchors = proposal_anchors[mb_mask]
        mb_anchor_boxes_3d = box_3d_encoder.anchors_to_box_3d(mb_anchors)
        mb_anchor_ious = max_ious[mb_mask]

    else:
        # We have no valid IoU's, so assume all IoUs are zeros
        # and the mini-batch contains all the anchors since we cannot
        # mask without IoUs.
        max_ious = np.zeros(proposal_boxes_3d.shape[0])
        mb_anchor_ious = max_ious
        mb_anchors = proposal_anchors
        mb_anchor_boxes_3d = box_3d_encoder.anchors_to_box_3d(mb_anchors)

    # Create list of positive/negative proposals based on iou
    pos_proposal_objs = []
    mid_proposal_objs = []
    neg_proposal_objs = []
    bkg_proposal_objs = []
    for i in range(len(proposal_boxes_3d)):
        box_3d = proposal_boxes_3d[i]

        if max_ious[i] == 0.0:
            # Background proposals
            bkg_proposal_objs.append(
                box_3d_encoder.box_3d_to_object_label(
                    box_3d, obj_type='BackgroundProposal'))

        elif max_ious[i] < neg_proposal_2d_iou_hi:
            # Negative proposals
            neg_proposal_objs.append(
                box_3d_encoder.box_3d_to_object_label(
                    box_3d, obj_type='NegativeProposal'))

        elif max_ious[i] < pos_proposal_2d_iou_lo:
            # Middle proposals (in between negative and positive)
            mid_proposal_objs.append(
                box_3d_encoder.box_3d_to_object_label(
                    box_3d, obj_type='MiddleProposal'))

        elif max_ious[i] <= 1.0:
            # Positive proposals
            pos_proposal_objs.append(
                box_3d_encoder.box_3d_to_object_label(
                    box_3d, obj_type='PositiveProposal'))

        else:
            raise ValueError('Invalid IoU > 1.0')

    print('{} bkg, {} neg, {} mid, {} pos proposals:'.format(
        len(bkg_proposal_objs), len(neg_proposal_objs), len(mid_proposal_objs),
        len(pos_proposal_objs)))

    # Convert the mini_batch anchors to object list
    mb_obj_list = []
    for i in range(len(mb_anchor_ious)):
        if valid_ious and (mb_anchor_ious[i] >
                           mini_batch_utils.mlod_pos_iou_range[0]):
            obj_type = "Positive"
        else:
            obj_type = "Negative"

        obj = box_3d_encoder.box_3d_to_object_label(mb_anchor_boxes_3d[i],
                                                    obj_type)
        mb_obj_list.append(obj)

    # Point cloud
    image = cv2.imread(dataset.get_rgb_image_path(sample_name))
    points, point_colours = demo_utils.get_filtered_pc_and_colours(
        dataset, image, img_idx)

    # Visualize from here
    vis_utils.visualization(dataset.rgb_image_dir, img_idx)
    plt.show(block=False)

    # VtkPointCloud
    vtk_point_cloud = VtkPointCloud()
    vtk_point_cloud.set_points(points, point_colours)

    # VtkAxes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # VtkBoxes for ground truth
    vtk_gt_boxes = VtkBoxes()
    vtk_gt_boxes.set_objects(filtered_objs, COLOUR_SCHEME)

    # VtkBoxes for ortho ground truth
    vtk_gt_ortho_boxes = VtkBoxes()
    vtk_gt_ortho_boxes.set_objects(gt_ortho_objs, COLOUR_SCHEME)

    # VtkBoxes for background proposals
    vtk_bkg_proposal_boxes = VtkBoxes()
    vtk_bkg_proposal_boxes.set_objects(bkg_proposal_objs, COLOUR_SCHEME)
    vtk_bkg_proposal_boxes.set_line_width(bkg_proposals_line_width)

    # VtkBoxes for negative proposals
    vtk_neg_proposal_boxes = VtkBoxes()
    vtk_neg_proposal_boxes.set_objects(neg_proposal_objs, COLOUR_SCHEME)
    vtk_neg_proposal_boxes.set_line_width(neg_proposals_line_width)

    # VtkBoxes for middle proposals
    vtk_mid_proposal_boxes = VtkBoxes()
    vtk_mid_proposal_boxes.set_objects(mid_proposal_objs, COLOUR_SCHEME)
    vtk_mid_proposal_boxes.set_line_width(mid_proposals_line_width)

    # VtkBoxes for positive proposals
    vtk_pos_proposal_boxes = VtkBoxes()
    vtk_pos_proposal_boxes.set_objects(pos_proposal_objs, COLOUR_SCHEME)
    vtk_pos_proposal_boxes.set_line_width(pos_proposals_line_width)

    # Create VtkBoxes for mini batch anchors
    vtk_mb_boxes = VtkBoxes()
    vtk_mb_boxes.set_objects(mb_obj_list, COLOUR_SCHEME)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Add actors
    vtk_renderer.AddActor(axes)
    vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)

    vtk_renderer.AddActor(vtk_gt_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_gt_ortho_boxes.vtk_actor)

    vtk_renderer.AddActor(vtk_bkg_proposal_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_neg_proposal_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_mid_proposal_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_pos_proposal_boxes.vtk_actor)

    vtk_renderer.AddActor(vtk_mb_boxes.vtk_actor)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(160.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName("MLOD Mini Batch")
    vtk_render_window.SetSize(900, 500)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

    vtk_render_window_interactor.SetInteractorStyle(
        vis_utils.ToggleActorsInteractorStyle([
            vtk_gt_boxes.vtk_actor,
            vtk_gt_ortho_boxes.vtk_actor,
            vtk_bkg_proposal_boxes.vtk_actor,
            vtk_neg_proposal_boxes.vtk_actor,
            vtk_mid_proposal_boxes.vtk_actor,
            vtk_pos_proposal_boxes.vtk_actor,
            vtk_mb_boxes.vtk_actor,
        ]))

    # Render in VTK
    vtk_render_window.Render()
    vtk_render_window_interactor.Start()
コード例 #9
0
def main():
    """
    Visualization of 3D grid anchor generation, showing 2D projections
        in BEV and image space, and a 3D display of the anchors
    """
    dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_TRAIN)
    dataset_config.num_clusters[0] = 1
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config)

    label_cluster_utils = LabelClusterUtils(dataset)
    clusters, _ = label_cluster_utils.get_clusters()

    # Options
    img_idx = 1
    # fake_clusters = np.array([[5, 4, 3], [6, 5, 4]])
    # fake_clusters = np.array([[3, 3, 3], [4, 4, 4]])

    fake_clusters = np.array([[4, 2, 3]])
    fake_anchor_stride = [5.0, 5.0]
    ground_plane = [0, -1, 0, 1.72]

    anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

    area_extents = np.array([[-40, 40], [-5, 5], [0, 70]])

    # Generate anchors for cars only
    start_time = time.time()
    anchor_boxes_3d = anchor_3d_generator.generate(
        area_3d=dataset.kitti_utils.area_extents,
        anchor_3d_sizes=fake_clusters,
        anchor_stride=fake_anchor_stride,
        ground_plane=ground_plane)
    all_anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d)
    end_time = time.time()
    print("Anchors generated in {} s".format(end_time - start_time))

    # Project into bev
    bev_boxes, bev_normalized_boxes = \
        anchor_projector.project_to_bev(all_anchors, area_extents[[0, 2]])

    bev_fig, (bev_axes, bev_normalized_axes) = \
        plt.subplots(1, 2, figsize=(16, 7))
    bev_axes.set_xlim(0, 80)
    bev_axes.set_ylim(70, 0)
    bev_normalized_axes.set_xlim(0, 1.0)
    bev_normalized_axes.set_ylim(1, 0.0)

    plt.show(block=False)

    for box in bev_boxes:
        box_w = box[2] - box[0]
        box_h = box[3] - box[1]

        rect = patches.Rectangle((box[0], box[1]),
                                 box_w,
                                 box_h,
                                 linewidth=2,
                                 edgecolor='b',
                                 facecolor='none')

        bev_axes.add_patch(rect)

    for normalized_box in bev_normalized_boxes:
        box_w = normalized_box[2] - normalized_box[0]
        box_h = normalized_box[3] - normalized_box[1]

        rect = patches.Rectangle((normalized_box[0], normalized_box[1]),
                                 box_w,
                                 box_h,
                                 linewidth=2,
                                 edgecolor='b',
                                 facecolor='none')

        bev_normalized_axes.add_patch(rect)

    rgb_fig, rgb_2d_axes, rgb_3d_axes = \
        vis_utils.visualization(dataset.rgb_image_dir, img_idx)
    plt.show(block=False)

    image_path = dataset.get_rgb_image_path(dataset.sample_names[img_idx])
    image_shape = np.array(Image.open(image_path)).shape

    stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir,
                                                   img_idx).p2

    start_time = time.time()
    rgb_boxes, rgb_normalized_boxes = \
        anchor_projector.project_to_image_space(all_anchors,
                                                stereo_calib_p2,
                                                image_shape)
    end_time = time.time()
    print("Anchors projected in {} s".format(end_time - start_time))

    # Read the stereo calibration matrix for visualization
    stereo_calib = calib_utils.read_calibration(dataset.calib_dir, 0)
    p = stereo_calib.p2

    # Overlay boxes on images
    anchor_objects = []
    for anchor_idx in range(len(anchor_boxes_3d)):
        anchor_box_3d = anchor_boxes_3d[anchor_idx]

        obj_label = box_3d_encoder.box_3d_to_object_label(anchor_box_3d)

        # Append to a list for visualization in VTK later
        anchor_objects.append(obj_label)

        # Draw 3D boxes
        vis_utils.draw_box_3d(rgb_3d_axes, obj_label, p)

        # Draw 2D boxes
        rgb_box_2d = rgb_boxes[anchor_idx]

        box_x1 = rgb_box_2d[0]
        box_y1 = rgb_box_2d[1]
        box_w = rgb_box_2d[2] - box_x1
        box_h = rgb_box_2d[3] - box_y1

        rect = patches.Rectangle((box_x1, box_y1),
                                 box_w,
                                 box_h,
                                 linewidth=2,
                                 edgecolor='b',
                                 facecolor='none')

        rgb_2d_axes.add_patch(rect)

        if anchor_idx % 32 == 0:
            rgb_fig.canvas.draw()

    plt.show(block=False)

    # Create VtkGroundPlane for ground plane visualization
    vtk_ground_plane = VtkGroundPlane()
    vtk_ground_plane.set_plane(ground_plane, area_extents[[0, 2]])

    # Create VtkAxes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # Create VtkBoxes for boxes
    vtk_boxes = VtkBoxes()
    vtk_boxes.set_objects(anchor_objects, vtk_boxes.COLOUR_SCHEME_KITTI)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()
    vtk_renderer.AddActor(vtk_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_ground_plane.vtk_actor)
    vtk_renderer.AddActor(axes)
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(170.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName("Anchors")
    vtk_render_window.SetSize(900, 500)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

    vtk_render_window_interactor.SetInteractorStyle(
        vtk.vtkInteractorStyleTrackballCamera())

    # Render in VTK
    vtk_render_window.Render()
    vtk_render_window_interactor.Start()  # Blocking
コード例 #10
0
def main():
    """Flip RPN Mini Batch
     Visualization of the mini batch anchors for RpnModel training.

     Keys:
         F1: Toggle mini batch anchors
         F2: Flipped
     """

    anchor_colour_scheme = {
        "Car": (255, 0, 0),  # Red
        "Pedestrian": (255, 150, 50),  # Orange
        "Cyclist": (150, 50, 100),  # Purple
        "DontCare": (255, 255, 255),  # White
        "Anchor": (150, 150, 150),  # Gray
        "Regressed Anchor": (255, 255, 0),  # Yellow
        "Positive": (0, 255, 255),  # Teal
        "Negative": (255, 0, 255)  # Purple
    }

    dataset_config_path = mlod.root_dir() + \
        '/configs/mb_rpn_demo_cars.config'

    # dataset_config_path = mlod.root_dir() + \
    #     '/configs/mb_rpn_demo_people.config'

    ##############################
    # Options
    ##############################
    # # # Random sample # # #
    sample_name = None

    # # # Cars # # #
    # sample_name = "000001"
    # sample_name = "000050"
    # sample_name = "000104"
    # sample_name = "000112"
    # sample_name = "000169"
    # sample_name = "000191"

    sample_name = "003801"

    # # # Pedestrians # # #
    # sample_name = "000000"
    # sample_name = "000011"
    # sample_name = "000015"
    # sample_name = "000028"
    # sample_name = "000035"
    # sample_name = "000134"
    # sample_name = "000167"
    # sample_name = '000379'
    # sample_name = '000381'
    # sample_name = '000397'
    # sample_name = '000398'
    # sample_name = '000401'
    # sample_name = '000407'
    # sample_name = '000486'
    # sample_name = '000509'

    # # Cyclists # # #
    # sample_name = '000122'
    # sample_name = '000448'

    # # # Multiple classes # # #
    # sample_name = "000764"
    ##############################
    # End of Options
    ##############################

    # Create Dataset
    dataset = DatasetBuilder.load_dataset_from_config(dataset_config_path)

    # Random sample
    if sample_name is None:
        sample_idx = np.random.randint(0, dataset.num_samples)
        sample_name = dataset.sample_list[sample_idx]

    anchor_strides = dataset.kitti_utils.anchor_strides

    img_idx = int(sample_name)

    print("Showing mini batch for sample {}".format(sample_name))

    image = cv2.imread(dataset.get_rgb_image_path(sample_name))
    image_shape = [image.shape[1], image.shape[0]]

    # KittiUtils class
    dataset_utils = dataset.kitti_utils

    ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)

    point_cloud = obj_utils.get_depth_map_point_cloud(img_idx,
                                                      dataset.calib_dir,
                                                      dataset.depth_dir,
                                                      image_shape)
    points = point_cloud.T

    # Grab ground truth
    ground_truth_list = obj_utils.read_labels(dataset.label_dir, img_idx)
    ground_truth_list = dataset_utils.filter_labels(ground_truth_list)

    stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir,
                                                   img_idx).p2

    ##############################
    # Flip sample info
    ##############################
    start_time = time.time()

    flipped_image = kitti_aug.flip_image(image)
    flipped_point_cloud = kitti_aug.flip_point_cloud(point_cloud)
    flipped_gt_list = [
        kitti_aug.flip_label_in_3d_only(obj) for obj in ground_truth_list
    ]
    flipped_ground_plane = kitti_aug.flip_ground_plane(ground_plane)
    flipped_calib_p2 = kitti_aug.flip_stereo_calib_p2(stereo_calib_p2,
                                                      image_shape)

    print('flip sample', time.time() - start_time)

    flipped_points = flipped_point_cloud.T
    point_colours = vis_utils.project_img_to_point_cloud(
        points, image, dataset.calib_dir, img_idx)

    ##############################
    # Generate anchors
    ##############################
    clusters, _ = dataset.get_cluster_info()
    anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

    # Read mini batch info
    anchors_info = dataset_utils.get_anchors_info(sample_name)

    all_anchor_boxes_3d = []
    all_ious = []
    all_offsets = []
    for class_idx in range(len(dataset.classes)):

        anchor_boxes_3d = anchor_generator.generate(
            area_3d=dataset.kitti_utils.area_extents,
            anchor_3d_sizes=clusters[class_idx],
            anchor_stride=anchor_strides[class_idx],
            ground_plane=ground_plane)

        if len(anchors_info[class_idx]) > 0:
            indices, ious, offsets, classes = anchors_info[class_idx]

            # Get non empty anchors from the indices
            non_empty_anchor_boxes_3d = anchor_boxes_3d[indices]

            all_anchor_boxes_3d.extend(non_empty_anchor_boxes_3d)
            all_ious.extend(ious)
            all_offsets.extend(offsets)

    if not len(all_anchor_boxes_3d) > 0:
        # Exit early if anchors_info is empty
        print("No anchors, Please try a different sample")
        return

    # Convert to ndarrays
    all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d)
    all_ious = np.asarray(all_ious)
    all_offsets = np.asarray(all_offsets)

    ##############################
    # Flip anchors
    ##############################
    start_time = time.time()

    # Flip anchors and offsets
    flipped_anchor_boxes_3d = kitti_aug.flip_boxes_3d(all_anchor_boxes_3d,
                                                      flip_ry=False)
    all_offsets[:, 0] = -all_offsets[:, 0]

    print('flip anchors and offsets', time.time() - start_time)

    # Overwrite with flipped things
    all_anchor_boxes_3d = flipped_anchor_boxes_3d
    points = flipped_points
    ground_truth_list = flipped_gt_list
    ground_plane = flipped_ground_plane

    ##############################
    # Mini batch sampling
    ##############################
    # Sample an RPN mini batch from the non empty anchors
    mini_batch_utils = dataset.kitti_utils.mini_batch_utils
    mb_mask_tf, _ = mini_batch_utils.sample_rpn_mini_batch(all_ious)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    mb_mask = sess.run(mb_mask_tf)

    mb_anchor_boxes_3d = all_anchor_boxes_3d[mb_mask]
    mb_anchor_ious = all_ious[mb_mask]
    mb_anchor_offsets = all_offsets[mb_mask]

    # ObjectLabel list that hold all boxes to visualize
    obj_list = []

    # Convert the mini_batch anchors to object list
    for i in range(len(mb_anchor_boxes_3d)):
        if mb_anchor_ious[i] > mini_batch_utils.rpn_pos_iou_range[0]:
            obj_type = "Positive"
        else:
            obj_type = "Negative"

        obj = box_3d_encoder.box_3d_to_object_label(mb_anchor_boxes_3d[i],
                                                    obj_type)
        obj_list.append(obj)

    # Convert all non-empty anchors to object list
    non_empty_anchor_objs = \
        [box_3d_encoder.box_3d_to_object_label(
            anchor_box_3d, obj_type='Anchor')
         for anchor_box_3d in all_anchor_boxes_3d]

    ##############################
    # Regress Positive Anchors
    ##############################
    # Convert anchor_boxes_3d to anchors and apply offsets
    mb_pos_mask = mb_anchor_ious > mini_batch_utils.rpn_pos_iou_range[0]
    mb_pos_anchor_boxes_3d = mb_anchor_boxes_3d[mb_pos_mask]
    mb_pos_anchor_offsets = mb_anchor_offsets[mb_pos_mask]

    mb_pos_anchors = box_3d_encoder.box_3d_to_anchor(mb_pos_anchor_boxes_3d)
    regressed_pos_anchors = anchor_encoder.offset_to_anchor(
        mb_pos_anchors, mb_pos_anchor_offsets)

    # Convert regressed anchors to ObjectLabels for visualization
    regressed_anchor_boxes_3d = box_3d_encoder.anchors_to_box_3d(
        regressed_pos_anchors, fix_lw=True)
    regressed_anchor_objs = \
        [box_3d_encoder.box_3d_to_object_label(
            box_3d, obj_type='Regressed Anchor')
         for box_3d in regressed_anchor_boxes_3d]

    ##############################
    # Visualization
    ##############################
    cv2.imshow('{} flipped'.format(sample_name), flipped_image)
    cv2.waitKey()

    # Create VtkAxes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # Create VtkBoxes for mini batch anchors
    vtk_pos_anchor_boxes = VtkBoxes()
    vtk_pos_anchor_boxes.set_objects(obj_list, anchor_colour_scheme)

    # VtkBoxes for non empty anchors
    vtk_non_empty_anchors = VtkBoxes()
    vtk_non_empty_anchors.set_objects(non_empty_anchor_objs,
                                      anchor_colour_scheme)
    vtk_non_empty_anchors.set_line_width(0.1)

    # VtkBoxes for regressed anchors
    vtk_regressed_anchors = VtkBoxes()
    vtk_regressed_anchors.set_objects(regressed_anchor_objs,
                                      anchor_colour_scheme)
    vtk_regressed_anchors.set_line_width(5.0)

    # Create VtkBoxes for ground truth
    vtk_gt_boxes = VtkBoxes()
    vtk_gt_boxes.set_objects(ground_truth_list,
                             anchor_colour_scheme,
                             show_orientations=True)

    vtk_point_cloud = VtkPointCloud()
    vtk_point_cloud.set_points(points, point_colours)

    vtk_ground_plane = VtkGroundPlane()
    vtk_ground_plane.set_plane(ground_plane, dataset.kitti_utils.bev_extents)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()

    vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)
    vtk_renderer.AddActor(vtk_non_empty_anchors.vtk_actor)
    vtk_renderer.AddActor(vtk_pos_anchor_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_regressed_anchors.vtk_actor)
    vtk_renderer.AddActor(vtk_gt_boxes.vtk_actor)
    vtk_renderer.AddActor(vtk_ground_plane.vtk_actor)

    vtk_renderer.AddActor(axes)
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(160.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName("RPN Mini Batch")
    vtk_render_window.SetSize(900, 500)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

    vtk_render_window_interactor.SetInteractorStyle(
        vis_utils.ToggleActorsInteractorStyle([
            vtk_non_empty_anchors.vtk_actor,
            vtk_pos_anchor_boxes.vtk_actor,
            vtk_regressed_anchors.vtk_actor,
            vtk_ground_plane.vtk_actor,
        ]))

    # Render in VTK
    vtk_render_window.Render()
    vtk_render_window_interactor.Start()
コード例 #11
0
def main():
    """
    Simple demo script for debugging integral images with visualization
    """
    anchor_colour_scheme = {"Anchor": (0, 0, 255)}  # Blue

    dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAIN)

    label_cluster_utils = LabelClusterUtils(dataset)
    clusters, _ = label_cluster_utils.get_clusters()

    area_extents = np.array([[0, 2], [-1, 0.], [0, 2]])
    boxes_3d = np.array([
        [2, 0, 1, 1, 1, 1, 0],
        [1, 0, 2, 1, 1, 1, 0],
    ])

    xyz = np.array([[0.5, -0.01, 1.1],
                    [1.5, -0.01, 1.1],
                    [0.5, -0.01, 1.6],
                    [1.5, -0.01, 1.6],
                    [0.5, -0.49, 1.1],
                    [1.5, -0.49, 1.1],
                    [0.5, -0.51, 1.6],
                    [1.5, -0.51, 1.6]
                    ])

    vx_grid_3d = voxel_grid.VoxelGrid()
    vx_grid_3d.voxelize(xyz, 0.1, area_extents)

    anchors = box_3d_encoder.box_3d_to_anchor(boxes_3d)

    # Filter the boxes here!
    start_time = time.time()
    empty_filter = anchor_filter.get_empty_anchor_filter(anchors=anchors,
                                                         voxel_grid_3d=vx_grid_3d,
                                                         density_threshold=1)
    boxes_3d = boxes_3d[empty_filter]
    end_time = time.time()
    print("Anchors filtered in {} s".format(end_time - start_time))

    box_objects = []
    for box_idx in range(len(boxes_3d)):
        box = boxes_3d[box_idx]
        obj_label = box_3d_encoder.box_3d_to_object_label(box, 'Anchor')

        # Append to a list for visualization in VTK later
        box_objects.append(obj_label)

    # Create VtkAxes
    axes = vtk.vtkAxesActor()
    axes.SetTotalLength(5, 5, 5)

    # Create VtkBoxes for boxes
    vtk_boxes = VtkBoxes()
    vtk_boxes.set_objects(box_objects, anchor_colour_scheme)

    vtk_point_cloud = VtkPointCloud()
    vtk_point_cloud.set_points(xyz)

    vtk_voxel_grid = VtkVoxelGrid()
    vtk_voxel_grid.set_voxels(vx_grid_3d)

    # Create Voxel Grid Renderer in bottom half
    vtk_renderer = vtk.vtkRenderer()
    vtk_renderer.AddActor(vtk_boxes.vtk_actor)
    # vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)
    vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor)
    vtk_renderer.AddActor(axes)
    vtk_renderer.SetBackground(0.2, 0.3, 0.4)

    # Setup Camera
    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.Pitch(170.0)
    current_cam.Roll(180.0)

    # Zooms out to fit all points on screen
    vtk_renderer.ResetCamera()

    # Zoom in slightly
    current_cam.Zoom(2.5)

    # Reset the clipping range to show all points
    vtk_renderer.ResetCameraClippingRange()

    # Setup Render Window
    vtk_render_window = vtk.vtkRenderWindow()
    vtk_render_window.SetWindowName("Anchors")
    vtk_render_window.SetSize(900, 500)
    vtk_render_window.AddRenderer(vtk_renderer)

    # Setup custom interactor style, which handles mouse and key events
    vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
    vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

    vtk_render_window_interactor.SetInteractorStyle(
        vtk.vtkInteractorStyleTrackballCamera())

    # Render in VTK
    vtk_render_window.Render()
    vtk_render_window_interactor.Start()
コード例 #12
0
    def _calculate_anchors_info(self, all_anchor_boxes_3d, empty_anchor_filter,
                                gt_labels, calib_p2, image_shape):
        """Calculates the list of anchor information in the format:
            N x 14 [index, max_gt_iou, (6 x offsets), class_index, max_gt_img_iou, (4 x image offsets), image_class_index]
                max_gt_out - highest 3D iou with any ground truth box
                offsets - encoded offsets [dx, dy, dz, d_dimx, d_dimy, d_dimz]
                class_index - the anchor's class as an index
                    (e.g. 0 or 1, for "Background" or "Car")
                max_gt_img_iou - highest image iou with any ground truth box
                image_offsets: encoded offsets [dx, dy, d_dimx, d_dimy]
                image_class_index: the anchor's class on image as an index
                    (e.g. 0 or 1, for "Background" or "Car")
                calib_p2: stereo camera calibration p2 matrix
                image_shape: dimensions of the image [h, w]

        Args:
            all_anchor_boxes_3d: list of anchors in box_3d format
                N x [x, y, z, l, w, h, ry]
            empty_anchor_filter: boolean mask of which anchors are non empty
            gt_labels: list of Object Label data format containing ground truth
                labels to generate positives/negatives from.

        Returns:
            list of anchor info
        """
        # Check for ground truth objects
        if len(gt_labels) == 0:
            raise Warning("No valid ground truth label to generate anchors.")

        kitti_utils = self._dataset.kitti_utils

        # Filter empty anchors
        anchor_indices = np.where(empty_anchor_filter)[0]
        anchor_boxes_3d = all_anchor_boxes_3d[empty_anchor_filter]

        # Convert anchor_boxes_3d to anchor format
        anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d)

        # Convert gt to boxes_3d -> anchors -> iou format
        gt_boxes_3d = np.asarray([
            box_3d_encoder.object_label_to_box_3d(gt_obj)
            for gt_obj in gt_labels
        ])
        gt_anchors = box_3d_encoder.box_3d_to_anchor(gt_boxes_3d,
                                                     ortho_rotate=True)

        rpn_iou_type = self.mini_batch_utils.rpn_iou_type
        if rpn_iou_type == '2d':
            # Convert anchors to 2d iou format
            anchors_for_2d_iou, _ = np.asarray(
                anchor_projector.project_to_bev(anchors,
                                                kitti_utils.bev_extents))

            gt_boxes_for_2d_iou, _ = anchor_projector.project_to_bev(
                gt_anchors, kitti_utils.bev_extents)

        elif rpn_iou_type == '3d':
            # Convert anchors to 3d iou format for calculation
            anchors_for_3d_iou = box_3d_encoder.box_3d_to_3d_iou_format(
                anchor_boxes_3d)

            gt_boxes_for_3d_iou = \
                box_3d_encoder.box_3d_to_3d_iou_format(gt_boxes_3d)
        else:
            raise ValueError('Invalid rpn_iou_type {}', rpn_iou_type)

        anchors_on_img, _ = np.asarray(
            anchor_projector.project_to_image_space(anchors, calib_p2,
                                                    image_shape))

        gt_img_boxes_for_2d_iou = np.asarray(
            [[gt_obj.x1, gt_obj.y1, gt_obj.x2, gt_obj.y2]
             for gt_obj in gt_labels])

        # Initialize sample and offset lists
        num_anchors = len(anchor_boxes_3d)
        all_info = np.zeros((num_anchors, self.mini_batch_utils.col_length))

        # Update anchor indices
        all_info[:, self.mini_batch_utils.col_anchor_indices] = anchor_indices

        # For each of the labels, generate samples
        for gt_idx in range(len(gt_labels)):

            gt_obj = gt_labels[gt_idx]
            gt_box_3d = gt_boxes_3d[gt_idx]

            # Get 2D or 3D IoU for every anchor
            if self.mini_batch_utils.rpn_iou_type == '2d':
                gt_box_for_2d_iou = gt_boxes_for_2d_iou[gt_idx]

                ious = evaluation.two_d_iou(gt_box_for_2d_iou,
                                            anchors_for_2d_iou)
            elif self.mini_batch_utils.rpn_iou_type == '3d':
                gt_box_for_3d_iou = gt_boxes_for_3d_iou[gt_idx]
                ious = evaluation.three_d_iou(gt_box_for_3d_iou,
                                              anchors_for_3d_iou)

            # Only update indices with a higher iou than before
            update_indices = np.greater(
                ious, all_info[:, self.mini_batch_utils.col_ious])

            # Get ious to update
            ious_to_update = ious[update_indices]

            # Calculate offsets, use 3D iou to get highest iou
            anchors_to_update = anchors[update_indices]
            gt_anchor = box_3d_encoder.box_3d_to_anchor(gt_box_3d,
                                                        ortho_rotate=True)
            offsets = anchor_encoder.anchor_to_offset(anchors_to_update,
                                                      gt_anchor)

            # Convert gt type to index
            class_idx = kitti_utils.class_str_to_index(gt_obj.type)

            # Update anchors info (indices already updated)
            # [index, iou, (offsets), class_index]
            all_info[update_indices,
                     self.mini_batch_utils.col_ious] = ious_to_update

            all_info[update_indices, self.mini_batch_utils.col_offsets_lo:self.
                     mini_batch_utils.col_offsets_hi] = offsets
            all_info[update_indices,
                     self.mini_batch_utils.col_class_idx] = class_idx

            # Image part
            gt_img_box_for_2d_iou = gt_img_boxes_for_2d_iou[gt_idx]
            img_ious = evaluation.two_d_iou(gt_img_box_for_2d_iou,
                                            anchors_on_img)

            # Only update indices with a higher iou than before
            update_img_indices = np.greater(
                img_ious, all_info[:, self.mini_batch_utils.col_img_ious])

            # Get ious to update
            img_ious_to_update = img_ious[update_img_indices]

            #Calculate image offsets
            anchors_on_img_to_update = anchors_on_img[update_img_indices]
            img_offsets = anchor_encoder.img_box_to_offset(
                anchors_on_img_to_update, gt_img_box_for_2d_iou)

            # Update anchors info
            all_info[update_img_indices,
                     self.mini_batch_utils.col_img_ious] = img_ious_to_update

            all_info[update_img_indices,
                     self.mini_batch_utils.col_img_offsets_lo:self.
                     mini_batch_utils.col_img_offsets_hi] = img_offsets
            all_info[update_img_indices,
                     self.mini_batch_utils.col_img_class_idx] = class_idx

        return all_info
コード例 #13
0
    def preprocess(self, indices):
        """Preprocesses anchor info and saves info to files

        Args:
            indices (int array): sample indices to process.
                If None, processes all samples
        """
        # Get anchor stride for class
        anchor_strides = self._anchor_strides

        dataset = self._dataset
        dataset_utils = self._dataset.kitti_utils
        classes_name = dataset.classes_name

        # Make folder if it doesn't exist yet
        output_dir = self.mini_batch_utils.get_file_path(classes_name,
                                                         anchor_strides,
                                                         sample_name=None)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Get clusters for class
        all_clusters_sizes, _ = dataset.get_cluster_info()

        anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

        # Load indices of data_split
        all_samples = dataset.sample_list

        if indices is None:
            indices = np.arange(len(all_samples))
        num_samples = len(indices)

        # For each image in the dataset, save info on the anchors
        for sample_idx in indices:
            # Get image name for given cluster
            sample_name = all_samples[sample_idx].name
            img_idx = int(sample_name)

            # Check for existing files and skip to the next
            if self._check_for_existing(classes_name, anchor_strides,
                                        sample_name):
                print("{} / {}: Sample already preprocessed".format(
                    sample_idx + 1, num_samples, sample_name))
                continue

            # Get ground truth and filter based on difficulty
            ground_truth_list = obj_utils.read_labels(dataset.label_dir,
                                                      img_idx)

            # Get calibration matrix
            calib_p2 = calib_utils.read_calibration(self.calib_dir,
                                                    int(sample_name)).p2

            # Filter objects to dataset classes
            filtered_gt_list = dataset_utils.filter_labels(ground_truth_list)
            filtered_gt_list = np.asarray(filtered_gt_list)

            # Filtering by class has no valid ground truth, skip this image
            if len(filtered_gt_list) == 0:
                print("{} / {} No {}s for sample {} "
                      "(Ground Truth Filter)".format(sample_idx + 1,
                                                     num_samples, classes_name,
                                                     sample_name))

                # Output an empty file and move on to the next image.
                self._save_to_file(classes_name, anchor_strides, sample_name)
                continue

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(img_idx,
                                                    dataset.planes_dir)

            image = Image.open(dataset.get_rgb_image_path(sample_name))
            image_shape = [image.size[1], image.size[0]]

            # Generate sliced 2D voxel grid for filtering
            vx_grid_2d = dataset_utils.create_sliced_voxel_grid_2d(
                sample_name,
                source=dataset.bev_source,
                image_shape=image_shape)

            # List for merging all anchors
            all_anchor_boxes_3d = []

            # Create anchors for each class
            for class_idx in range(len(dataset.classes)):
                # Generate anchors for all classes
                grid_anchor_boxes_3d = anchor_generator.generate(
                    area_3d=self._area_extents,
                    anchor_3d_sizes=all_clusters_sizes[class_idx],
                    anchor_stride=self._anchor_strides[class_idx],
                    ground_plane=ground_plane)

                all_anchor_boxes_3d.extend(grid_anchor_boxes_3d)

            # Filter empty anchors
            all_anchor_boxes_3d = np.asarray(all_anchor_boxes_3d)
            anchors = box_3d_encoder.box_3d_to_anchor(all_anchor_boxes_3d)
            empty_anchor_filter = anchor_filter.get_empty_anchor_filter_2d(
                anchors, vx_grid_2d, self._density_threshold)

            # Calculate anchor info
            anchors_info = self._calculate_anchors_info(
                all_anchor_boxes_3d, empty_anchor_filter, filtered_gt_list,
                calib_p2, image_shape)

            anchor_ious = anchors_info[:, self.mini_batch_utils.col_ious]

            valid_iou_indices = np.where(anchor_ious > 0.0)[0]

            print("{} / {}:"
                  "{:>6} anchors, "
                  "{:>6} iou > 0.0, "
                  "for {:>3} {}(s) for sample {}".format(
                      sample_idx + 1, num_samples, len(anchors_info),
                      len(valid_iou_indices), len(filtered_gt_list),
                      classes_name, sample_name))

            # Save anchors info
            self._save_to_file(classes_name, anchor_strides, sample_name,
                               anchors_info)