Example #1
0
    def testAnnotateKITTIObjectsWithBBox3D(self):
        objects = kitti_data.LoadLabelFile(self._label_file)
        calib = kitti_data.LoadCalibrationFile(self._calib_file)
        objects = kitti_data.AnnotateKITTIObjectsWithBBox3D(objects, calib)
        for obj in objects:
            self.assertEqual(len(obj['bbox3d']), 7)

        # atol=0.01 corresponds to a 1cm tolerance.
        self.assertAllClose(objects[0]['bbox3d'][:3], [69.72, -0.45, 0.58],
                            atol=0.01)
        self.assertAllClose(objects[0]['bbox3d'][3:],
                            [12.34, 2.63, 2.85, -0.01079633])
        self.assertAllEqual(objects[0]['has_3d_info'], True)

        # no 3D data
        self.assertAllEqual(objects[3]['has_3d_info'], False)
Example #2
0
def _ReadObjectDataset(root_dir, frame_names):
    """Reads and parses KITTI dataset files into a list of TFExample protos."""
    examples = []

    total_frames = len(frame_names)
    for frame_index, frame_name in enumerate(frame_names):
        image_file_path = os.path.join(root_dir, 'image_2',
                                       frame_name + '.png')
        calib_file_path = os.path.join(root_dir, 'calib', frame_name + '.txt')
        velo_file_path = os.path.join(root_dir, 'velodyne',
                                      frame_name + '.bin')
        label_file_path = os.path.join(root_dir, 'label_2',
                                       frame_name + '.txt')

        example = tf.train.Example()
        feature = example.features.feature

        # frame information
        feature['image/source_id'].bytes_list.value[:] = [frame_name]

        # 2D image data
        encoded_image = tf.gfile.Open(image_file_path).read()
        feature['image/encoded'].bytes_list.value[:] = [encoded_image]
        image = np.array(Image.open(io.BytesIO(encoded_image)))
        assert image.ndim == 3
        assert image.shape[2] == 3
        image_width = image.shape[1]
        image_height = image.shape[0]
        feature['image/width'].int64_list.value[:] = [image_width]
        feature['image/height'].int64_list.value[:] = [image_height]
        feature['image/format'].bytes_list.value[:] = ['PNG']

        # 3D velodyne point data
        velo_dict = kitti_data.LoadVeloBinFile(velo_file_path)
        point_list = velo_dict['xyz'].ravel().tolist()
        feature['pointcloud/xyz'].float_list.value[:] = point_list
        reflectance_list = velo_dict['reflectance'].ravel().tolist()
        feature[
            'pointcloud/reflectance'].float_list.value[:] = reflectance_list

        # Object data
        calib_dict = kitti_data.LoadCalibrationFile(calib_file_path)
        if tf.gfile.Exists(label_file_path):
            # Load object labels for training data
            object_dicts = kitti_data.LoadLabelFile(label_file_path)
            object_dicts = kitti_data.AnnotateKITTIObjectsWithBBox3D(
                object_dicts, calib_dict)
        else:
            # No object labels for test data
            object_dicts = {}

        num_objects = len(object_dicts)
        xmins = [None] * num_objects
        xmaxs = [None] * num_objects
        ymins = [None] * num_objects
        ymaxs = [None] * num_objects
        labels = [None] * num_objects
        has_3d_infos = [None] * num_objects

        # 3D info
        occlusions = [None] * num_objects
        truncations = [None] * num_objects
        xyzs = [None] * num_objects
        dim_xyzs = [None] * num_objects
        phis = [None] * num_objects

        for object_index, object_dict in enumerate(object_dicts):
            xmins[object_index] = object_dict['bbox'][0]
            xmaxs[object_index] = object_dict['bbox'][2]
            ymins[object_index] = object_dict['bbox'][1]
            ymaxs[object_index] = object_dict['bbox'][3]
            labels[object_index] = object_dict['type']
            has_3d_infos[object_index] = 1 if object_dict['has_3d_info'] else 0
            occlusions[object_index] = object_dict['occluded']
            truncations[object_index] = object_dict['truncated']
            xyzs[object_index] = object_dict['bbox3d'][:3]
            dim_xyzs[object_index] = object_dict['bbox3d'][3:6]
            phis[object_index] = object_dict['bbox3d'][6]

        feature['object/image/bbox/xmin'].float_list.value[:] = xmins
        feature['object/image/bbox/xmax'].float_list.value[:] = xmaxs
        feature['object/image/bbox/ymin'].float_list.value[:] = ymins
        feature['object/image/bbox/ymax'].float_list.value[:] = ymaxs
        feature['object/label'].bytes_list.value[:] = labels
        feature['object/has_3d_info'].int64_list.value[:] = has_3d_infos
        feature['object/occlusion'].int64_list.value[:] = occlusions
        feature['object/truncation'].float_list.value[:] = truncations
        xyzs = np.array(xyzs).ravel().tolist()
        feature['object/velo/bbox/xyz'].float_list.value[:] = xyzs
        dim_xyzs = np.array(dim_xyzs).ravel().tolist()
        feature['object/velo/bbox/dim_xyz'].float_list.value[:] = dim_xyzs
        feature['object/velo/bbox/phi'].float_list.value[:] = phis

        # Transformation matrices
        velo_to_image_plane = kitti_data.VeloToImagePlaneTransformation(
            calib_dict)
        feature['transform/velo_to_image_plane'].float_list.value[:] = (
            velo_to_image_plane.ravel().tolist())
        velo_to_camera = kitti_data.VeloToCameraTransformation(calib_dict)
        feature['transform/velo_to_camera'].float_list.value[:] = (
            velo_to_camera.ravel().tolist())
        cam_to_velo = kitti_data.CameraToVeloTransformation(calib_dict)
        feature['transform/camera_to_velo'].float_list.value[:] = (
            cam_to_velo.ravel().tolist())

        examples.append(example)
        if frame_index % 100 == 0:
            logging.info('Processed frame %d of %d.', frame_index,
                         total_frames)

    return examples