Beispiel #1
0
def get_point_colours(points, cam_p, image):
    points_in_im = calib_utils.project_pc_to_image(points.T, cam_p)
    points_in_im_rounded = np.round(points_in_im).astype(np.int32)

    point_colours = image[points_in_im_rounded[1], points_in_im_rounded[0]]

    return point_colours
Beispiel #2
0
def compute_orientation_3d(obj, p):
    """Computes the orientation given object and camera matrix

    Keyword arguments:
    obj -- object file to draw bounding box
    p -- transform matrix
    """

    # compute rotational matrix
    rot = np.array([[+np.cos(obj.ry), 0, +np.sin(obj.ry)],
                    [0, 1, 0],
                    [-np.sin(obj.ry), 0, +np.cos(obj.ry)]])

    orientation3d = np.array([0.0, obj.l, 0.0, 0.0, 0.0, 0.0]).reshape(3, 2)
    orientation3d = np.dot(rot, orientation3d)

    orientation3d[0, :] = orientation3d[0, :] + obj.t[0]
    orientation3d[1, :] = orientation3d[1, :] + obj.t[1]
    orientation3d[2, :] = orientation3d[2, :] + obj.t[2]

    # only draw for boxes that are in front of the camera
    for idx in np.arange(orientation3d.shape[1]):
        if orientation3d[2, idx] < 0.1:
            return None

    return calib_utils.project_pc_to_image(orientation3d, p)
Beispiel #3
0
def postprocess_cen_x(pred_box_2d, pred_box_3d, cam_p):
    """Post-process centroid x by projecting predicted 3D box and finding width ratio to centre,
    and then finding the u position by using this ratio on the 2D box and projecting it.

    Args:
        pred_box_2d: 2D box in box_2d format [y1, x1, y2, x2]
        pred_box_3d: 3D box in box_3d format [x, y, z, l, w, h, ry]
        cam_p: camera projection matrix

    Returns:
        new_cen_x: post-processed centroid x position
    """

    focal_length = cam_p[0, 0]
    centre_u = cam_p[0, 2]

    # Project corners
    pred_box_3d_corners = obj_utils.compute_box_3d_corners(pred_box_3d)
    pred_box_corners_uv = calib_utils.project_pc_to_image(
        pred_box_3d_corners, cam_p)

    # Project centroid
    pred_cen_pc = pred_box_3d[0:3, np.newaxis]
    pred_cen_uv = calib_utils.project_pc_to_image(pred_cen_pc, cam_p)

    # Find min u
    pred_box_min_u = np.amin(pred_box_corners_uv[0])
    pred_box_max_u = np.amax(pred_box_corners_uv[0])

    # Find centroid u ratio
    pred_box_w = pred_box_max_u - pred_box_min_u
    pred_box_cen_u_ratio = (pred_cen_uv[0] - pred_box_min_u) / pred_box_w

    # Find new u from original 2D detection
    pred_box_w = pred_box_2d[3] - pred_box_2d[1]
    pred_box_u = pred_box_2d[1] + pred_box_cen_u_ratio * pred_box_w

    # Calculate new centroid x
    i = pred_box_u - centre_u

    # Similar triangles ratio (x/i = d/f)
    pred_cen_z = pred_box_3d[2]
    ratio = pred_cen_z / focal_length
    new_cen_x = i * ratio

    return new_cen_x
Beispiel #4
0
def proj_points(xz_dist,
                centroid_y,
                viewing_angle,
                cam2_inst_points_local,
                cam_p,
                rotate_view=True):
    """Projects point based on estimated transformation matrix
    calculated from xz_dist and viewing angle

    Args:
        xz_dist: distance along viewing angle
        centroid_y: box centroid y
        viewing_angle: viewing angle
        cam2_inst_points_local: (N, 3) instance points
        cam_p: (3, 4) camera projection matrix
        rotate_view: bool whether to rotate by viewing angle

    Returns:
        points_uv: (2, N) The points projected in u, v coordinates
        valid_points_mask: (N) Mask of valid points
    """

    guess_x = xz_dist * np.sin(viewing_angle)
    guess_y = centroid_y
    guess_z = xz_dist * np.cos(viewing_angle)

    # Rotate predicted instance points to viewing angle and translate to guessed centroid
    rot_mat = transform_utils.np_get_tr_mat(viewing_angle, (0.0, 0.0, 0.0))
    t_mat = transform_utils.np_get_tr_mat(0.0, [guess_x, guess_y, guess_z])
    if rotate_view:
        cam2_points_rotated = transform_utils.apply_tr_mat_to_points(
            rot_mat, cam2_inst_points_local)
    else:
        cam2_points_rotated = cam2_inst_points_local

    cam2_points_global = transform_utils.apply_tr_mat_to_points(
        t_mat, cam2_points_rotated)

    # Get valid points mask
    valid_points_mask = np.sum(np.abs(cam2_points_rotated), axis=1) > 0.1

    # Shift points into cam0 frame for projection
    # Get x offset (b_cam) from calibration: cam_mat[0, 3] = (-f_x * b_cam)
    x_offset = -cam_p[0, 3] / cam_p[0, 0]

    # Shift points from cam2 to cam0 frame
    cam0_points_global = (cam2_points_global +
                          [x_offset, 0, 0]) * valid_points_mask.reshape(-1, 1)

    # Project back to image
    pred_points_in_img = calib_utils.project_pc_to_image(
        cam0_points_global.T, cam_p) * valid_points_mask

    return pred_points_in_img, valid_points_mask
Beispiel #5
0
def get_lidar_point_cloud_for_cam(sample_name, frame_calib, velo_dir,
                                  image_shape=None, cam_idx=2):
    """Gets the lidar point cloud in cam0 frame, and optionally returns only the
    points that are projected to an image.

    Args:
        sample_name: sample name
        frame_calib: FrameCalib frame calibration
        velo_dir: velodyne directory
        image_shape: (optional) image shape [h, w] to filter points inside image
        cam_idx: (optional) cam index (2 or 3) for filtering

    Returns:
        (3, N) point_cloud in the form [[x,...][y,...][z,...]]
    """

    # Get points in camera frame
    point_cloud = get_lidar_point_cloud(sample_name, frame_calib, velo_dir)

    # Only keep points in front of camera (positive z)
    point_cloud = point_cloud[:, point_cloud[2] > 1.0]

    if image_shape is None:
        return point_cloud

    else:

        # Project to image frame
        if cam_idx == 2:
            cam_p = frame_calib.p2
        elif cam_idx == 3:
            cam_p = frame_calib.p3
        else:
            raise ValueError('Invalid cam_idx', cam_idx)

        # Project to image
        points_in_img = calib_utils.project_pc_to_image(point_cloud, cam_p=cam_p)
        points_in_img_rounded = np.round(points_in_img)

        # Filter based on the given image shape
        image_filter = (points_in_img_rounded[0] >= 0) & \
                       (points_in_img_rounded[0] < image_shape[1]) & \
                       (points_in_img_rounded[1] >= 0) & \
                       (points_in_img_rounded[1] < image_shape[0])

        filtered_point_cloud = point_cloud[:, image_filter].astype(np.float32)

        return filtered_point_cloud
Beispiel #6
0
def get_viewing_angle_box_3d(box_3d, cam_p=None, version='x_offset'):
    """Calculates the viewing angle to the centroid of a box_3d

    Args:
        box_3d: box_3d in cam_0 frame
        cam_p: camera projection matrix, required if version is not 'cam_0'
        version:
            'cam_0': assuming cam_0 frame
            'x_offset': apply x_offset from camera baseline to cam_0
            'projection': project centroid to image

    Returns:
        viewing_angle: viewing angle to box centroid
    """
    format_checker.check_box_3d_format(box_3d)

    if version == 'cam_0':
        # Viewing angle in cam_0 frame
        viewing_angle = np.arctan2(box_3d[0], box_3d[2])

    elif version == 'x_offset':
        # Get x offset (b_cam) from calibration: cam_mat[0, 3] = (-f_x * b_cam)
        x_offset = -cam_p[0, 3] / cam_p[0, 0]

        # Shift box_3d from cam_0 to cam_N frame
        box_x_cam = box_3d[0] - x_offset

        # Calculate viewing angle
        viewing_angle = np.arctan2(box_x_cam, box_3d[2])

    elif version == 'projection':
        # Project centroid to the image
        proj_uv = calib_utils.project_pc_to_image(box_3d[0:3].reshape(3, -1), cam_p)

        centre_u = cam_p[0, 2]
        focal_length = cam_p[0, 0]

        centre_x = proj_uv[0][0]

        # Assume depth of 1.0 to calculate viewing angle
        # viewing_angle = atan2(i / f, 1.0)
        viewing_angle = np.arctan2((centre_x - centre_u) / focal_length, 1.0)

    else:
        raise ValueError('Invalid version', version)

    return viewing_angle
Beispiel #7
0
def project_depths(point_cloud, cam_p, image_shape, max_depth=100.0):
    """Projects a point cloud into image space and saves depths per pixel.

    Args:
        point_cloud: (3, N) Point cloud in cam0
        cam_p: camera projection matrix
        image_shape: image shape [h, w]
        max_depth: optional, max depth for inversion

    Returns:
        projected_depths: projected depth map
    """

    # Only keep points in front of the camera
    all_points = point_cloud.T

    # Save the depth corresponding to each point
    points_in_img = calib_utils.project_pc_to_image(all_points.T, cam_p)
    points_in_img_int = np.int32(np.round(points_in_img))

    # Remove points outside image
    valid_indices = \
        (points_in_img_int[0] >= 0) & (points_in_img_int[0] < image_shape[1]) & \
        (points_in_img_int[1] >= 0) & (points_in_img_int[1] < image_shape[0])

    all_points = all_points[valid_indices]
    points_in_img_int = points_in_img_int[:, valid_indices]

    # Invert depths
    all_points[:, 2] = max_depth - all_points[:, 2]

    # Only save valid pixels, keep closer points when overlapping
    projected_depths = np.zeros(image_shape)
    valid_indices = [points_in_img_int[1], points_in_img_int[0]]
    projected_depths[valid_indices] = [
        max(
            projected_depths[points_in_img_int[1, idx],
                             points_in_img_int[0, idx]], all_points[idx, 2])
        for idx in range(points_in_img_int.shape[1])
    ]

    projected_depths[valid_indices] = \
        max_depth - projected_depths[valid_indices]

    return projected_depths.astype(np.float32)
Beispiel #8
0
def project_corners_3d_to_image(corners_3d, p):
    """Computes the 3D bounding box projected onto
    image space.

    Keyword arguments:
    obj -- object file to draw bounding box
    p -- transform matrix

    Returns:
        corners : numpy array of corner points projected
        onto image space.
        face_idx: numpy array of 3D bounding box face
    """
    # index for 3d bounding box face
    # it is converted to 4x4 matrix
    face_idx = np.array([0, 1, 5, 4,  # front face
                         1, 2, 6, 5,  # right face
                         2, 3, 7, 6,  # back face
                         3, 0, 4, 7]).reshape((4, 4))  # left face
    return calib_utils.project_pc_to_image(corners_3d, p), face_idx
Beispiel #9
0
    def test_tf_project_pc_to_image(self):
        """Check that tf_project_pc_to_image matches numpy version"""

        dataset = DatasetBuilder.build_kitti_dataset(
            DatasetBuilder.KITTI_TRAINVAL)

        np.random.seed(12345)
        point_cloud_batch = np.random.rand(32, 3, 2304)
        frame_calib = calib_utils.get_frame_calib(dataset.calib_dir, '000050')
        cam_p = frame_calib.p2

        exp_proj_uv = [
            calib_utils.project_pc_to_image(point_cloud, cam_p)
            for point_cloud in point_cloud_batch
        ]

        tf_proj_uv = calib_utils.tf_project_pc_to_image(
            point_cloud_batch, cam_p, 32)

        with self.test_session() as sess:
            proj_uv_out = sess.run(tf_proj_uv)

        np.testing.assert_allclose(exp_proj_uv, proj_uv_out)
def main():

    ##############################
    # Options
    ##############################

    point_cloud_source = 'depth_2_multiscale'

    samples_to_use = None  # all samples

    dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL)

    out_instance_dir = 'outputs/instance_2_{}'.format(point_cloud_source)

    required_classes = [
        'Car',
        'Pedestrian',
        'Cyclist',
        'Van',
        'Truck',
        'Person_sitting',
        'Tram',
        'Misc',
    ]

    ##############################
    # End of Options
    ##############################

    # Create instance folder
    os.makedirs(out_instance_dir, exist_ok=True)

    # Get frame ids to process
    if samples_to_use is None:
        samples_to_use = dataset.get_sample_names()

    # Begin instance mask generation
    for sample_idx, sample_name in enumerate(samples_to_use):

        sys.stdout.write(
            '\r{} / {} Generating {} instances for sample {}'.format(
                sample_idx, dataset.num_samples - 1, point_cloud_source,
                sample_name))

        # Get image
        image = obj_utils.get_image(sample_name, dataset.image_2_dir)
        image_shape = image.shape[0:2]

        # Get calibration
        frame_calib = calib_utils.get_frame_calib(dataset.calib_dir,
                                                  sample_name)

        # Get point cloud
        if point_cloud_source.startswith('depth'):
            point_cloud = obj_utils.get_depth_map_point_cloud(
                sample_name, frame_calib, dataset.depth_dir)

        elif point_cloud_source == 'velo':
            point_cloud = obj_utils.get_lidar_point_cloud_for_cam(
                sample_name, frame_calib, dataset.velo_dir, image_shape)
        else:
            raise ValueError('Invalid point cloud source', point_cloud_source)

        # Filter according to classes
        obj_labels = obj_utils.read_labels(dataset.kitti_label_dir,
                                           sample_name)
        obj_labels, _ = obj_utils.filter_labels_by_class(
            obj_labels, required_classes)

        # Get 2D and 3D bounding boxes from labels
        gt_boxes_2d = [
            box_3d_encoder.object_label_to_box_2d(obj_label)
            for obj_label in obj_labels
        ]
        gt_boxes_3d = [
            box_3d_encoder.object_label_to_box_3d(obj_label)
            for obj_label in obj_labels
        ]

        instance_image = np.full(image_shape, 255, dtype=np.uint8)

        # Start instance index at 0 and generate instance masks for all boxes
        inst_idx = 0
        for obj_label, box_2d, box_3d in zip(obj_labels, gt_boxes_2d,
                                             gt_boxes_3d):

            # Apply inflation and offset to box_3d
            modified_box_3d = modify_box_3d(box_3d, obj_label)

            # Get points in 3D box
            box_points, mask = obj_utils.points_in_box_3d(
                modified_box_3d, point_cloud.T)

            # Get points in 2D box
            points_in_im = calib_utils.project_pc_to_image(
                box_points.T, cam_p=frame_calib.p2)
            mask_2d = \
                (points_in_im[0] >= box_2d[1]) & \
                (points_in_im[0] <= box_2d[3]) & \
                (points_in_im[1] >= box_2d[0]) & \
                (points_in_im[1] <= box_2d[2])

            if point_cloud_source.startswith('depth'):
                mask_points_in_im = np.where(mask.reshape(image_shape))
                mask_points_in_im = [
                    mask_points_in_im[0][mask_2d],
                    mask_points_in_im[1][mask_2d]
                ]
                instance_pixels = np.asarray(
                    [mask_points_in_im[1], mask_points_in_im[0]])
            elif point_cloud_source == 'velo':
                # image_points = box_utils.project_to_image(
                #     box_points.T, frame.p_left).astype(np.int32)
                pass

            # Guarantees that indices don't exceed image dimensions
            instance_pixels[0, :] = np.clip(instance_pixels[0, :], 0,
                                            image_shape[1] - 1)
            instance_pixels[1, :] = np.clip(instance_pixels[1, :], 0,
                                            image_shape[0] - 1)

            instance_image[instance_pixels[1, :],
                           instance_pixels[0, :]] = np.uint8(inst_idx)

            inst_idx += 1

        # Write image to directory
        cv2.imwrite(out_instance_dir + '/{}.png'.format(sample_name),
                    instance_image, [cv2.IMWRITE_PNG_COMPRESSION, 1])
Beispiel #11
0
def project_to_image_space(box_3d,
                           calib_p2,
                           truncate=False,
                           image_size=None,
                           discard=True,
                           discard_before_truncation=True):
    """ Projects a box_3d into image space

    Args:
        box_3d: single box_3d to project
        calib_p2: stereo calibration p2 matrix
        truncate: if True, 2D projections are truncated to be inside the image
        image_size: [w, h] must be provided if truncate is True,
            used for truncation
        discard: if True, discard boxes that are truncated over a certain amount
        discard_before_truncation: If True, discard boxes that are larger than
            80% of the image in width OR height BEFORE truncation. If False,
            discard boxes that are larger than 80% of the width AND
            height AFTER truncation.

    Returns:
        Projected box in image space [x1, y1, x2, y2]
            Returns None if box is not inside the image
    """

    format_checker.check_box_3d_format(box_3d)

    obj_label = box_3d_encoder.box_3d_to_object_label(box_3d)
    corners_3d = obj_utils.compute_obj_label_corners_3d(obj_label)

    projected = calib_utils.project_pc_to_image(corners_3d, calib_p2)

    x1 = np.amin(projected[0])
    y1 = np.amin(projected[1])
    x2 = np.amax(projected[0])
    y2 = np.amax(projected[1])

    img_box = np.array([x1, y1, x2, y2])

    if truncate:
        if not image_size:
            raise ValueError('Image size must be provided')

        image_w = image_size[0]
        image_h = image_size[1]

        # Discard invalid boxes (outside image space)
        if img_box[0] > image_w or \
                img_box[1] > image_h or \
                img_box[2] < 0 or \
                img_box[3] < 0:
            return None

        # Discard boxes that are larger than 80% of the image width OR height
        if discard and discard_before_truncation:
            img_box_w = img_box[2] - img_box[0]
            img_box_h = img_box[3] - img_box[1]
            if img_box_w > (image_w * 0.8) or img_box_h > (image_h * 0.8):
                return None

        # Truncate remaining boxes into image space
        if img_box[0] < 0:
            img_box[0] = 0
        if img_box[1] < 0:
            img_box[1] = 0
        if img_box[2] > image_w:
            img_box[2] = image_w
        if img_box[3] > image_h:
            img_box[3] = image_h

        # Discard boxes that are covering the the whole image after truncation
        if discard and not discard_before_truncation:
            img_box_w = img_box[2] - img_box[0]
            img_box_h = img_box[3] - img_box[1]
            if img_box_w > (image_w * 0.8) and img_box_h > (image_h * 0.8):
                return None

    return img_box