def testVeloToImagePlaneTransformation(self): objects = kitti_data.LoadLabelFile(self._label_file) calib = kitti_data.LoadCalibrationFile(self._calib_file) # Only apply to object 0. obj = objects[0] bbox3d = kitti_data._KITTIObjectToBBox3D( obj, kitti_data.CameraToVeloTransformation(calib)) # Convert to corners in our canonical space. corners = geometry.BBoxCorners( tf.constant([[bbox3d]], dtype=tf.float32)) with self.session(): corners_np = self.evaluate(corners) corners_np = corners_np.reshape([8, 3]) # Add homogenous coordinates. corners_np = np.concatenate([corners_np, np.ones((8, 1))], axis=-1) # Apply the velo to image plane transformation. velo_to_img = kitti_data.VeloToImagePlaneTransformation(calib) corners_np = np.dot(corners_np, velo_to_img.T) # Divide by the last coordinate to recover pixel locations. corners_np[:, 0] /= corners_np[:, 2] corners_np[:, 1] /= corners_np[:, 2] # Obtain 2D bbox. min_x = np.min(corners_np[:, 0]) max_x = np.max(corners_np[:, 0]) min_y = np.min(corners_np[:, 1]) max_y = np.max(corners_np[:, 1]) bbox = [min_x, min_y, max_x, max_y] # left, top, right, bottom. # This should correspond to the GT bbox in obj['bbox']. # We use atol=0.1 here since they should close to the nearest pixel. self.assertAllClose(bbox, obj['bbox'], atol=0.1)
def _ReadObjectDataset(root_dir, frame_names): """Reads and parses KITTI dataset files into a list of TFExample protos.""" examples = [] total_frames = len(frame_names) for frame_index, frame_name in enumerate(frame_names): image_file_path = os.path.join(root_dir, 'image_2', frame_name + '.png') calib_file_path = os.path.join(root_dir, 'calib', frame_name + '.txt') velo_file_path = os.path.join(root_dir, 'velodyne', frame_name + '.bin') label_file_path = os.path.join(root_dir, 'label_2', frame_name + '.txt') example = tf.train.Example() feature = example.features.feature # frame information feature['image/source_id'].bytes_list.value[:] = [frame_name] # 2D image data encoded_image = tf.gfile.Open(image_file_path).read() feature['image/encoded'].bytes_list.value[:] = [encoded_image] image = np.array(Image.open(io.BytesIO(encoded_image))) assert image.ndim == 3 assert image.shape[2] == 3 image_width = image.shape[1] image_height = image.shape[0] feature['image/width'].int64_list.value[:] = [image_width] feature['image/height'].int64_list.value[:] = [image_height] feature['image/format'].bytes_list.value[:] = ['PNG'] # 3D velodyne point data velo_dict = kitti_data.LoadVeloBinFile(velo_file_path) point_list = velo_dict['xyz'].ravel().tolist() feature['pointcloud/xyz'].float_list.value[:] = point_list reflectance_list = velo_dict['reflectance'].ravel().tolist() feature[ 'pointcloud/reflectance'].float_list.value[:] = reflectance_list # Object data calib_dict = kitti_data.LoadCalibrationFile(calib_file_path) if tf.gfile.Exists(label_file_path): # Load object labels for training data object_dicts = kitti_data.LoadLabelFile(label_file_path) object_dicts = kitti_data.AnnotateKITTIObjectsWithBBox3D( object_dicts, calib_dict) else: # No object labels for test data object_dicts = {} num_objects = len(object_dicts) xmins = [None] * num_objects xmaxs = [None] * num_objects ymins = [None] * num_objects ymaxs = [None] * num_objects labels = [None] * num_objects has_3d_infos = [None] * num_objects # 3D info occlusions = [None] * num_objects truncations = [None] * num_objects xyzs = [None] * num_objects dim_xyzs = [None] * num_objects phis = [None] * num_objects for object_index, object_dict in enumerate(object_dicts): xmins[object_index] = object_dict['bbox'][0] xmaxs[object_index] = object_dict['bbox'][2] ymins[object_index] = object_dict['bbox'][1] ymaxs[object_index] = object_dict['bbox'][3] labels[object_index] = object_dict['type'] has_3d_infos[object_index] = 1 if object_dict['has_3d_info'] else 0 occlusions[object_index] = object_dict['occluded'] truncations[object_index] = object_dict['truncated'] xyzs[object_index] = object_dict['bbox3d'][:3] dim_xyzs[object_index] = object_dict['bbox3d'][3:6] phis[object_index] = object_dict['bbox3d'][6] feature['object/image/bbox/xmin'].float_list.value[:] = xmins feature['object/image/bbox/xmax'].float_list.value[:] = xmaxs feature['object/image/bbox/ymin'].float_list.value[:] = ymins feature['object/image/bbox/ymax'].float_list.value[:] = ymaxs feature['object/label'].bytes_list.value[:] = labels feature['object/has_3d_info'].int64_list.value[:] = has_3d_infos feature['object/occlusion'].int64_list.value[:] = occlusions feature['object/truncation'].float_list.value[:] = truncations xyzs = np.array(xyzs).ravel().tolist() feature['object/velo/bbox/xyz'].float_list.value[:] = xyzs dim_xyzs = np.array(dim_xyzs).ravel().tolist() feature['object/velo/bbox/dim_xyz'].float_list.value[:] = dim_xyzs feature['object/velo/bbox/phi'].float_list.value[:] = phis # Transformation matrices velo_to_image_plane = kitti_data.VeloToImagePlaneTransformation( calib_dict) feature['transform/velo_to_image_plane'].float_list.value[:] = ( velo_to_image_plane.ravel().tolist()) velo_to_camera = kitti_data.VeloToCameraTransformation(calib_dict) feature['transform/velo_to_camera'].float_list.value[:] = ( velo_to_camera.ravel().tolist()) cam_to_velo = kitti_data.CameraToVeloTransformation(calib_dict) feature['transform/camera_to_velo'].float_list.value[:] = ( cam_to_velo.ravel().tolist()) examples.append(example) if frame_index % 100 == 0: logging.info('Processed frame %d of %d.', frame_index, total_frames) return examples