Esempio n. 1
0
def ExtractNpContent(np_dict, calib):
    """Parse saved np arrays and convert 3D bboxes to camera0 coordinates.

  Args:
    np_dict: a dict of numpy arrays.
    calib: a parsed calibration dictionary.

  Returns:
    A tuple of 6 ndarrays:

    - location_camera: [N, 3]. [x, y, z] in camera0 coordinate.
    - dimension_camera: [N, 3]. The [height, width, length] of objects.
    - phi_camera: [N]. Rotation around y-axis in camera0 coodinate.
    - bboxes_2d: [N, 4]. The corresponding 2D bboxes in the image coordinate.
    - scores: [N]. Confidence scores for each box for the assigned class.
    - class_ids: [N]. The class id assigned to each box.
  """
    bboxes = np_dict["bboxes"]
    scores = np_dict["scores"]
    class_ids = np_dict["class_ids"]
    bboxes_2d = np_dict["bboxes_2d"]

    # Transform from velodyne coordinates to camera coordinates.
    velo_to_cam_transform = kitti_data.VeloToCameraTransformation(calib)
    location_cam = np.zeros((len(bboxes), 3))
    dimension_cam = np.zeros((len(bboxes), 3))
    rotation_cam = np.zeros((len(bboxes), 1))
    for idx, bbox in enumerate(bboxes):
        location_cam[idx, :], dimension_cam[idx, :], rotation_cam[idx, :] = (
            kitti_data.BBox3DToKITTIObject(bbox, velo_to_cam_transform))

    return location_cam, dimension_cam, rotation_cam, bboxes_2d, scores, class_ids
Esempio n. 2
0
 def testKITTIObjToBBoxAndInverse(self):
     objects = kitti_data.LoadLabelFile(self._label_file)
     calib = kitti_data.LoadCalibrationFile(self._calib_file)
     for obj in objects:
         bbox3d = kitti_data._KITTIObjectToBBox3D(
             obj, kitti_data.CameraToVeloTransformation(calib))
         location, dimensions, rotation_y = kitti_data.BBox3DToKITTIObject(
             bbox3d, kitti_data.VeloToCameraTransformation(calib))
         self.assertAllClose(obj['location'], location)
         self.assertAllClose(obj['dimensions'], dimensions)
         self.assertAllClose(obj['rotation_y'], rotation_y)
Esempio n. 3
0
 def testVeloToCamAndCamToVeloAreInverses(self):
     calib = kitti_data.LoadCalibrationFile(self._calib_file)
     velo_to_cam = kitti_data.VeloToCameraTransformation(calib)
     cam_to_velo = kitti_data.CameraToVeloTransformation(calib)
     self.assertAllClose(cam_to_velo.dot(velo_to_cam), np.eye(4))
Esempio n. 4
0
def _ReadObjectDataset(root_dir, frame_names):
    """Reads and parses KITTI dataset files into a list of TFExample protos."""
    examples = []

    total_frames = len(frame_names)
    for frame_index, frame_name in enumerate(frame_names):
        image_file_path = os.path.join(root_dir, 'image_2',
                                       frame_name + '.png')
        calib_file_path = os.path.join(root_dir, 'calib', frame_name + '.txt')
        velo_file_path = os.path.join(root_dir, 'velodyne',
                                      frame_name + '.bin')
        label_file_path = os.path.join(root_dir, 'label_2',
                                       frame_name + '.txt')

        example = tf.train.Example()
        feature = example.features.feature

        # frame information
        feature['image/source_id'].bytes_list.value[:] = [frame_name]

        # 2D image data
        encoded_image = tf.gfile.Open(image_file_path).read()
        feature['image/encoded'].bytes_list.value[:] = [encoded_image]
        image = np.array(Image.open(io.BytesIO(encoded_image)))
        assert image.ndim == 3
        assert image.shape[2] == 3
        image_width = image.shape[1]
        image_height = image.shape[0]
        feature['image/width'].int64_list.value[:] = [image_width]
        feature['image/height'].int64_list.value[:] = [image_height]
        feature['image/format'].bytes_list.value[:] = ['PNG']

        # 3D velodyne point data
        velo_dict = kitti_data.LoadVeloBinFile(velo_file_path)
        point_list = velo_dict['xyz'].ravel().tolist()
        feature['pointcloud/xyz'].float_list.value[:] = point_list
        reflectance_list = velo_dict['reflectance'].ravel().tolist()
        feature[
            'pointcloud/reflectance'].float_list.value[:] = reflectance_list

        # Object data
        calib_dict = kitti_data.LoadCalibrationFile(calib_file_path)
        if tf.gfile.Exists(label_file_path):
            # Load object labels for training data
            object_dicts = kitti_data.LoadLabelFile(label_file_path)
            object_dicts = kitti_data.AnnotateKITTIObjectsWithBBox3D(
                object_dicts, calib_dict)
        else:
            # No object labels for test data
            object_dicts = {}

        num_objects = len(object_dicts)
        xmins = [None] * num_objects
        xmaxs = [None] * num_objects
        ymins = [None] * num_objects
        ymaxs = [None] * num_objects
        labels = [None] * num_objects
        has_3d_infos = [None] * num_objects

        # 3D info
        occlusions = [None] * num_objects
        truncations = [None] * num_objects
        xyzs = [None] * num_objects
        dim_xyzs = [None] * num_objects
        phis = [None] * num_objects

        for object_index, object_dict in enumerate(object_dicts):
            xmins[object_index] = object_dict['bbox'][0]
            xmaxs[object_index] = object_dict['bbox'][2]
            ymins[object_index] = object_dict['bbox'][1]
            ymaxs[object_index] = object_dict['bbox'][3]
            labels[object_index] = object_dict['type']
            has_3d_infos[object_index] = 1 if object_dict['has_3d_info'] else 0
            occlusions[object_index] = object_dict['occluded']
            truncations[object_index] = object_dict['truncated']
            xyzs[object_index] = object_dict['bbox3d'][:3]
            dim_xyzs[object_index] = object_dict['bbox3d'][3:6]
            phis[object_index] = object_dict['bbox3d'][6]

        feature['object/image/bbox/xmin'].float_list.value[:] = xmins
        feature['object/image/bbox/xmax'].float_list.value[:] = xmaxs
        feature['object/image/bbox/ymin'].float_list.value[:] = ymins
        feature['object/image/bbox/ymax'].float_list.value[:] = ymaxs
        feature['object/label'].bytes_list.value[:] = labels
        feature['object/has_3d_info'].int64_list.value[:] = has_3d_infos
        feature['object/occlusion'].int64_list.value[:] = occlusions
        feature['object/truncation'].float_list.value[:] = truncations
        xyzs = np.array(xyzs).ravel().tolist()
        feature['object/velo/bbox/xyz'].float_list.value[:] = xyzs
        dim_xyzs = np.array(dim_xyzs).ravel().tolist()
        feature['object/velo/bbox/dim_xyz'].float_list.value[:] = dim_xyzs
        feature['object/velo/bbox/phi'].float_list.value[:] = phis

        # Transformation matrices
        velo_to_image_plane = kitti_data.VeloToImagePlaneTransformation(
            calib_dict)
        feature['transform/velo_to_image_plane'].float_list.value[:] = (
            velo_to_image_plane.ravel().tolist())
        velo_to_camera = kitti_data.VeloToCameraTransformation(calib_dict)
        feature['transform/velo_to_camera'].float_list.value[:] = (
            velo_to_camera.ravel().tolist())
        cam_to_velo = kitti_data.CameraToVeloTransformation(calib_dict)
        feature['transform/camera_to_velo'].float_list.value[:] = (
            cam_to_velo.ravel().tolist())

        examples.append(example)
        if frame_index % 100 == 0:
            logging.info('Processed frame %d of %d.', frame_index,
                         total_frames)

    return examples