def setUpClass(cls):
        cls.system = mock_types.MockSystem()
        cls.image_source = mock_types.MockImageSource()

        image_data = [
            np.random.normal(128, 20, size=(10, 10)).astype(np.uint8)
            for _ in range(cls.num_images)
        ]
        cls.images = [
            Image(
                pixels=pixels,
                metadata=make_metadata(
                    pixels=pixels,
                    source_type=ImageSourceType.SYNTHETIC,
                    camera_pose=Transform(
                        (idx * 15, idx, 0),
                        tf3d.quaternions.axangle2quat((1, 2, 3), 5 * idx * np.pi / (2 * cls.num_images)), w_first=True
                    ),
                    intrinsics=CameraIntrinsics(
                        width=10, height=10,
                        fx=5, fy=5,
                        cx=5, cy=5
                    )
                )
            )
            for idx, pixels in enumerate(image_data)
        ]
def make_image_sequence(timestep, width, height, length):
    images = []
    for _ in range(length):
        pixels = np.random.randint(0, 255, size=(height, width, 3), dtype=np.uint8)
        image = Image(
            pixels=pixels,
            metadata=imeta.make_metadata(
                pixels,
                source_type=imeta.ImageSourceType.SYNTHETIC,
                intrinsics=CameraIntrinsics(800, 600, 550.2, 750.2, 400, 300),
            ),
            additional_metadata={'test': True}
        )
        image.save()
        images.append(image)
    sequence = ImageCollection(
        images=images,
        timestamps=[idx * timestep for idx in range(length)],
        sequence_type=ImageSequenceType.SEQUENTIAL
    )
    sequence.save()
    return sequence
Beispiel #3
0
def import_sequence(root_folder: Path, left_path: Path, right_path: Path,
                    depth_quality: DepthNoiseQuality = DepthNoiseQuality.KINECT_NOISE) -> ImageCollection:
    """
    Import the sequence, as a bunch of stereo images, and then organised into an ImageCollection.
    ImageCollection and StereoImage objects are saved.

    :param root_folder: The root folder to import from, containing the timestamps and settings files
    :param left_path: The path to the left image sequences
    :param right_path: The path to the right image sequences
    :param depth_quality: Noisy depth is generated as we go, the quality to use when doing so
    :return: The imported image collection
    """
    # Read the timestamps and the generator settings
    # These are saved from python, so need for special loading
    with (root_folder / 'settings.json').open('r') as fp:
        settings = json_load(fp)
    with (root_folder / 'timestamps.json').open('r') as fp:
        timestamps = json_load(fp)

    # Read the camera settings from file
    left_camera_intrinsics = read_camera_intrinsics(left_path / '_camera_settings.json')
    right_camera_intrinsics = read_camera_intrinsics(right_path / '_camera_settings.json')
    # left_object_labels = read_object_classes(left_path / '_object_settings.json')
    # right_object_labels = read_object_classes(right_path / '_object_settings.json')

    max_img_id = min(
        find_max_img_id(lambda idx: left_path / IMG_TEMPLATE.format(idx)),
        find_max_img_id(lambda idx: right_path / IMG_TEMPLATE.format(idx)),
    )
    if len(timestamps) != max_img_id + 1:
        raise RuntimeError(f"Maximum image id {max_img_id} didn't match the number "
                           f"of available timestamps ({len(timestamps)}), cannot associate.")

    # Read meta-information from the generator, including the timestamps
    sequence_name = root_folder.name
    (
        trajectory_id, environment_type, light_level, time_of_day, simulation_world,
        lighting_model, texture_mipmap_bias, normal_maps_enabled, roughness_enabled, min_object_size,
        geometry_decimation
    ) = parse_settings(settings)

    # Import all the images
    images = []
    image_group = sequence_name
    origin = None
    # Open the image manager for writing once, so that we're not constantly opening and closing it with each image
    with arvet.database.image_manager.get().get_group(image_group, allow_write=True):
        for img_idx in range(max_img_id + 1):
            logging.getLogger(__name__).info(f"Loading image {img_idx}...")
            # Read the raw data for the left image
            left_frame_data = read_json(left_path / DATA_TEMPLATE.format(img_idx))
            left_pixels = image_utils.read_colour(left_path / IMG_TEMPLATE.format(img_idx))
            # left_label_image = image_utils.read_colour(left_path / INSTANCE_TEMPLATE.format(img_idx))
            left_true_depth = load_depth_image(left_path / DEPTH_TEMPLATE.format(img_idx))

            # Read the raw data for the right image
            right_frame_data = read_json(right_path / DATA_TEMPLATE.format(img_idx))
            right_pixels = image_utils.read_colour(right_path / IMG_TEMPLATE.format(img_idx))
            # right_label_image = image_utils.read_colour(right_path / INSTANCE_TEMPLATE.format(img_idx))
            right_true_depth = load_depth_image(right_path / DEPTH_TEMPLATE.format(img_idx))

            # Ensure all images are c_contiguous
            if not left_pixels.flags.c_contiguous:
                left_pixels = np.ascontiguousarray(left_pixels)
            # if not left_label_image.flags.c_contiguous:
            #     left_label_image = np.ascontiguousarray(left_label_image)
            if not left_true_depth.flags.c_contiguous:
                left_true_depth = np.ascontiguousarray(left_true_depth)
            if not right_pixels.flags.c_contiguous:
                right_pixels = np.ascontiguousarray(right_pixels)
            # if not right_label_image.flags.c_contiguous:
            #     right_label_image = np.ascontiguousarray(right_label_image)
            if not right_true_depth.flags.c_contiguous:
                right_true_depth = np.ascontiguousarray(right_true_depth)

            # Extract the poses
            left_camera_pose = read_camera_pose(left_frame_data)
            right_camera_pose = read_camera_pose(right_frame_data)

            # If we have object data, extract labels for them as well
            # Not working? removed
            # if len(left_object_labels) > 0:
            #     left_labelled_objects = find_labelled_objects(left_label_image, left_frame_data, left_object_labels)
            # else:
            #     left_labelled_objects = []
            # if len(right_object_labels) > 0:
            #     right_labelled_objects = find_labelled_objects(right_label_image, right_frame_data, right_object_labels)
            # else:
            #     right_labelled_objects = []
            left_labelled_objects = []
            right_labelled_objects = []

            # Compute a noisy depth image
            noisy_depth = create_noisy_depth_image(
                left_true_depth=left_true_depth,
                right_true_depth=right_true_depth,
                camera_intrinsics=left_camera_intrinsics,
                right_camera_relative_pose=left_camera_pose.find_relative(right_camera_pose),
                quality_level=depth_quality
            )

            # Re-centre the camera poses relative to the first frame
            if origin is None:
                origin = left_camera_pose
            left_camera_pose = origin.find_relative(left_camera_pose)
            right_camera_pose = origin.find_relative(right_camera_pose)

            left_metadata = imeta.make_metadata(
                pixels=left_pixels,
                depth=left_true_depth,
                camera_pose=left_camera_pose,
                intrinsics=left_camera_intrinsics,
                source_type=imeta.ImageSourceType.SYNTHETIC,
                environment_type=environment_type,
                light_level=light_level,
                time_of_day=time_of_day,
                simulation_world=simulation_world,
                lighting_model=lighting_model,
                texture_mipmap_bias=texture_mipmap_bias,
                normal_maps_enabled=normal_maps_enabled,
                roughness_enabled=roughness_enabled,
                geometry_decimation=geometry_decimation,
                minimum_object_volume=min_object_size,
                labelled_objects=left_labelled_objects
            )
            right_metadata = imeta.make_right_metadata(
                pixels=right_pixels,
                depth=right_true_depth,
                camera_pose=right_camera_pose,
                intrinsics=right_camera_intrinsics,
                labelled_objects=right_labelled_objects,
                left_metadata=left_metadata
            )
            image = StereoImage(
                pixels=left_pixels,
                right_pixels=right_pixels,
                depth=noisy_depth,
                true_depth=left_true_depth,
                right_true_depth=right_true_depth,
                image_group=image_group,
                metadata=left_metadata,
                right_metadata=right_metadata,
            )
            try:
                image.save()
            except KeyError as exp:
                logging.getLogger(__name__).info(f"Key error while saving image {img_idx} in sequence {sequence_name}, "
                                                 f"read from {left_path / IMG_TEMPLATE.format(img_idx)}")
                raise exp
            images.append(image)

    # Create and save the image collection
    collection = ImageCollection(
        images=images,
        image_group=sequence_name,
        timestamps=timestamps,
        sequence_type=ImageSequenceType.SEQUENTIAL,
        dataset="generated-SLAM-data",
        sequence_name=sequence_name,
        trajectory_id=trajectory_id
    )
    collection.save()
    return collection
Beispiel #4
0
    def create_frame(self, time: float) -> Image:
        img_shape = (self.height, self.width,
                     3) if self.colour else (self.height, self.width)
        frame = np.zeros(img_shape, dtype=np.uint8)
        depth = None
        if self.mode is ImageMode.RGBD:
            depth = (1000 + 2 * len(self.stars)) * np.ones(
                (self.height, self.width), dtype=np.float64)
        f = self.focal_length
        cx = frame.shape[1] / 2
        cy = frame.shape[0] / 2

        for star in self.stars:
            x, y, z = star['pos']
            x -= self.speed * time
            if z <= 0:
                break  # Stars are sorted by z value, so once they're past the camera, stop.

            left = int(np.round(f * ((x - star['width'] / 2) / z) + cx))
            right = int(np.round(f * ((x + star['width'] / 2) / z) + cx))

            top = int(np.round(f * ((y - star['height'] / 2) / z) + cy))
            bottom = int(np.round(f * ((y + star['height'] / 2) / z) + cy))

            left = max(0, min(frame.shape[1], left))
            right = max(0, min(frame.shape[1], right))
            top = max(0, min(frame.shape[0], top))
            bottom = max(0, min(frame.shape[0], bottom))

            frame[top:bottom, left:right] = star['colour']
            if depth is not None:
                depth[top:bottom, left:right] = z

        metadata = imeta.make_metadata(
            pixels=frame,
            depth=depth,
            source_type=imeta.ImageSourceType.SYNTHETIC,
            camera_pose=Transform(location=[time * self.speed, 0, 0],
                                  rotation=[0, 0, 0, 1]),
            intrinsics=CameraIntrinsics(width=frame.shape[1],
                                        height=frame.shape[0],
                                        fx=f,
                                        fy=f,
                                        cx=cx,
                                        cy=cy))

        # If we're building a stereo image, make the right image
        if self.mode is ImageMode.STEREO:
            right_frame = np.zeros(img_shape, dtype=np.uint8)
            for star in self.stars:
                x, y, z = star['pos']
                x -= self.stereo_offset + self.speed * time
                if z <= 0:
                    break

                left = int(np.round(f * ((x - star['width'] / 2) / z) + cx))
                right = int(np.round(f * ((x + star['width'] / 2) / z) + cx))

                top = int(np.round(f * ((y - star['height'] / 2) / z) + cy))
                bottom = int(np.round(f * ((y + star['height'] / 2) / z) + cy))

                left = max(0, min(frame.shape[1], left))
                right = max(0, min(frame.shape[1], right))
                top = max(0, min(frame.shape[0], top))
                bottom = max(0, min(frame.shape[0], bottom))

                right_frame[top:bottom, left:right] = star['colour']
            right_metadata = imeta.make_right_metadata(
                pixels=right_frame,
                left_metadata=metadata,
                source_type=imeta.ImageSourceType.SYNTHETIC,
                camera_pose=Transform(
                    location=[time * self.speed, -1 * self.stereo_offset, 0],
                    rotation=[0, 0, 0, 1]),
                intrinsics=CameraIntrinsics(width=frame.shape[1],
                                            height=frame.shape[0],
                                            fx=f,
                                            fy=f,
                                            cx=cx,
                                            cy=cy))
            return StereoImage(pixels=frame,
                               image_group='test',
                               metadata=metadata,
                               right_pixels=right_frame,
                               right_metadata=right_metadata)

        if depth is not None:
            return Image(pixels=frame,
                         image_group='test',
                         depth=depth,
                         metadata=metadata)
        return Image(pixels=frame, image_group='test', metadata=metadata)
Beispiel #5
0
def import_dataset(root_folder, sequence_number, **_):
    """
    Load a KITTI image sequences into the database.
    :return:
    """
    sequence_number = int(sequence_number)
    if not 0 <= sequence_number < 11:
        raise ValueError("Cannot import sequence {0}, it is invalid".format(sequence_number))
    root_folder = find_root(root_folder, sequence_number)
    data = pykitti.odometry(root_folder, sequence="{0:02}".format(sequence_number))

    # dataset.calib:      Calibration data are accessible as a named tuple
    # dataset.timestamps: Timestamps are parsed into a list of timedelta objects
    # dataset.poses:      Generator to load ground truth poses T_w_cam0
    # dataset.camN:       Generator to load individual images from camera N
    # dataset.gray:       Generator to load monochrome stereo pairs (cam0, cam1)
    # dataset.rgb:        Generator to load RGB stereo pairs (cam2, cam3)
    # dataset.velo:       Generator to load velodyne scans as [x,y,z,reflectance]
    image_group = f"KITTI_{sequence_number:06}"
    images = []
    timestamps = []
    with arvet.database.image_manager.get().get_group(image_group, allow_write=True):
        for left_image, right_image, timestamp, pose in zip(data.cam2, data.cam3, data.timestamps, data.poses):
            left_image = np.array(left_image)
            right_image = np.array(right_image)
            camera_pose = make_camera_pose(pose)
            # camera pose is for cam0, we want cam2, which is 6cm (0.06m) to the left
            # Except that we don't need to control for that, since we want to be relative to the first pose anyway
            # camera_pose = camera_pose.find_independent(tf.Transform(location=(0, 0.06, 0), rotation=(0, 0, 0, 1),
            #                                                         w_first=False))
            # Stereo offset is 0.54m (http://www.cvlibs.net/datasets/kitti/setup.php)
            right_camera_pose = camera_pose.find_independent(Transform(location=(0, -0.54, 0), rotation=(0, 0, 0, 1),
                                                                       w_first=False))
            camera_intrinsics = CameraIntrinsics(
                height=left_image.shape[0],
                width=left_image.shape[1],
                fx=data.calib.K_cam2[0, 0],
                fy=data.calib.K_cam2[1, 1],
                cx=data.calib.K_cam2[0, 2],
                cy=data.calib.K_cam2[1, 2])
            right_camera_intrinsics = CameraIntrinsics(
                height=right_image.shape[0],
                width=right_image.shape[1],
                fx=data.calib.K_cam3[0, 0],
                fy=data.calib.K_cam3[1, 1],
                cx=data.calib.K_cam3[0, 2],
                cy=data.calib.K_cam3[1, 2])
            left_metadata = imeta.make_metadata(
                pixels=left_image,
                camera_pose=camera_pose,
                intrinsics=camera_intrinsics,
                source_type=imeta.ImageSourceType.REAL_WORLD,
                environment_type=imeta.EnvironmentType.OUTDOOR_URBAN,
                light_level=imeta.LightingLevel.WELL_LIT,
                time_of_day=imeta.TimeOfDay.AFTERNOON,
            )
            right_metadata = imeta.make_right_metadata(
                pixels=right_image,
                left_metadata=left_metadata,
                camera_pose=right_camera_pose,
                intrinsics=right_camera_intrinsics
            )
            image = StereoImage(
                pixels=left_image,
                right_pixels=right_image,
                image_group=image_group,
                metadata=left_metadata,
                right_metadata=right_metadata
            )
            image.save()
            images.append(image)
            timestamps.append(timestamp.total_seconds())

    # Create and save the image collection
    collection = ImageCollection(
        images=images,
        timestamps=timestamps,
        sequence_type=ImageSequenceType.SEQUENTIAL,
        dataset='KITTI',
        sequence_name="{0:02}".format(sequence_number),
        trajectory_id="KITTI_{0:02}".format(sequence_number)
    )
    collection.save()
    return collection
Beispiel #6
0
def import_dataset(root_folder, dataset_name, **_):
    """
    Load a TUM RGB-D sequence into the database.


    :return:
    """
    root_folder = Path(root_folder)

    # Step 0: Check the root folder to see if it needs to be extracted from a tarfile
    delete_when_done = None
    if not root_folder.is_dir():
        if (root_folder.parent / dataset_name).is_dir():
            # The root was a tarball, but the extracted data already exists, just use that as the root
            root_folder = root_folder.parent / dataset_name
        else:
            candidate_tar_file = root_folder.parent / (dataset_name + '.tgz')
            if candidate_tar_file.is_file() and tarfile.is_tarfile(
                    candidate_tar_file):
                # Root is actually a tarfile, extract it. find_roots with handle folder structures
                with tarfile.open(candidate_tar_file) as tar_fp:
                    tar_fp.extractall(root_folder.parent / dataset_name)
                root_folder = root_folder.parent / dataset_name
                delete_when_done = root_folder
            else:
                # Could find neither a dir nor a tarfile to extract from
                raise NotADirectoryError(
                    "'{0}' is not a directory".format(root_folder))

    # Step 1: Find the relevant metadata files
    root_folder, rgb_path, depth_path, trajectory_path = find_files(
        root_folder)

    # Step 2: Read the metadata from them
    image_files = read_image_filenames(rgb_path)
    trajectory = read_trajectory(trajectory_path, image_files.keys())
    depth_files = read_image_filenames(depth_path)

    # Step 3: Associate the different data types by timestamp
    all_metadata = associate_data(image_files, trajectory, depth_files)

    # Step 3: Load the images from the metadata
    first_timestamp = None
    image_group = dataset_name
    images = []
    timestamps = []
    with arvet.database.image_manager.get().get_group(image_group,
                                                      allow_write=True):
        for timestamp, image_file, camera_pose, depth_file in all_metadata:
            # Re-zero the timestamps
            if first_timestamp is None:
                first_timestamp = timestamp
            timestamp = (timestamp - first_timestamp)

            rgb_data = image_utils.read_colour(
                os.path.join(root_folder, image_file))
            depth_data = image_utils.read_depth(
                os.path.join(root_folder, depth_file))
            depth_data = depth_data / 5000  # Re-scale depth to meters
            camera_intrinsics = get_camera_intrinsics(root_folder)

            metadata = imeta.make_metadata(
                pixels=rgb_data,
                depth=depth_data,
                camera_pose=camera_pose,
                intrinsics=camera_intrinsics,
                source_type=imeta.ImageSourceType.REAL_WORLD,
                environment_type=environment_types.get(
                    dataset_name, imeta.EnvironmentType.INDOOR_CLOSE),
                light_level=imeta.LightingLevel.WELL_LIT,
                time_of_day=imeta.TimeOfDay.DAY,
            )
            image = Image(pixels=rgb_data,
                          depth=depth_data,
                          image_group=image_group,
                          metadata=metadata)
            image.save()
            images.append(image)
            timestamps.append(timestamp)

    # Create and save the image collection
    collection = ImageCollection(images=images,
                                 timestamps=timestamps,
                                 sequence_type=ImageSequenceType.SEQUENTIAL,
                                 dataset='TUM RGB-D',
                                 sequence_name=dataset_name,
                                 trajectory_id=dataset_name)
    collection.save()

    if delete_when_done is not None and delete_when_done.exists():
        # We're done and need to clean up after ourselves
        shutil.rmtree(delete_when_done)

    return collection
Beispiel #7
0
def import_dataset(root_folder, dataset_name, **_):
    """
    Load an Autonomous Systems Lab dataset into the database.
    See http://projects.asl.ethz.ch/datasets/doku.php?id=kmavvisualinertialdatasets#downloads

    Some information drawn from the ethz_asl dataset tools, see: https://github.com/ethz-asl/dataset_tools
    :param root_folder: The body folder, containing body.yaml (i.e. the extracted mav0 folder)
    :param dataset_name: The name of the dataset, see the manager for the list of valid values.
    :return:
    """
    if not os.path.isdir(root_folder):
        raise NotADirectoryError(
            "'{0}' is not a directory".format(root_folder))

    # Step 1: Find the various files containing the data (that's a 6-element tuple unpack for the return value)
    (root_folder, left_rgb_path, left_camera_intrinsics_path, right_rgb_path,
     right_camera_intrinsics_path, trajectory_path) = find_files(root_folder)

    # Step 2: Read the meta-information from the files (that's a 6-element tuple unpack for the return value)
    left_image_files = read_image_filenames(left_rgb_path)
    left_extrinsics, left_intrinsics = get_camera_calibration(
        left_camera_intrinsics_path)
    right_image_files = read_image_filenames(left_rgb_path)
    right_extrinsics, right_intrinsics = get_camera_calibration(
        right_camera_intrinsics_path)
    trajectory = read_trajectory(trajectory_path, left_image_files.keys())

    # Step 3: Create stereo rectification matrices from the intrinsics
    left_x, left_y, left_intrinsics, right_x, right_y, right_intrinsics = rectify(
        left_extrinsics, left_intrinsics, right_extrinsics, right_intrinsics)

    # Change the coordinates correctly on the extrinsics. Has to happen after rectification
    left_extrinsics = fix_coordinates(left_extrinsics)
    right_extrinsics = fix_coordinates(right_extrinsics)

    # Step 4: Associate the different data types by timestamp. Trajectory last because it's bigger than the stereo.
    all_metadata = associate_data(left_image_files, right_image_files,
                                  trajectory)

    # Step 5: Load the images from the metadata
    first_timestamp = None
    image_group = dataset_name
    images = []
    timestamps = []
    with arvet.database.image_manager.get().get_group(image_group,
                                                      allow_write=True):
        for timestamp, left_image_file, right_image_file, robot_pose in all_metadata:
            # Timestamps are in POSIX nanoseconds, re-zero them to the start of the dataset, and scale to seconds
            if first_timestamp is None:
                first_timestamp = timestamp
            timestamp = (timestamp - first_timestamp) / 1e9

            left_data = image_utils.read_colour(
                os.path.join(root_folder, 'cam0', 'data', left_image_file))
            right_data = image_utils.read_colour(
                os.path.join(root_folder, 'cam1', 'data', right_image_file))

            # Error check the loaded image data
            if left_data is None or left_data.size is 0:
                logging.getLogger(__name__).warning(
                    "Could not read left image \"{0}\", result is empty. Skipping."
                    .format(
                        os.path.join(root_folder, 'cam0', 'data',
                                     left_image_file)))
                continue
            if right_data is None or right_data.size is 0:
                logging.getLogger(__name__).warning(
                    "Could not read right image \"{0}\", result is empty. Skipping."
                    .format(
                        os.path.join(root_folder, 'cam1', 'data',
                                     right_image_file)))
                continue

            left_data = cv2.remap(left_data, left_x, left_y, cv2.INTER_LINEAR)
            right_data = cv2.remap(right_data, right_x, right_y,
                                   cv2.INTER_LINEAR)

            left_pose = robot_pose.find_independent(left_extrinsics)
            right_pose = robot_pose.find_independent(right_extrinsics)

            left_metadata = imeta.make_metadata(
                pixels=left_data,
                camera_pose=left_pose,
                intrinsics=left_intrinsics,
                source_type=imeta.ImageSourceType.REAL_WORLD,
                environment_type=environment_types.get(
                    dataset_name, imeta.EnvironmentType.INDOOR_CLOSE),
                light_level=imeta.LightingLevel.WELL_LIT,
                time_of_day=imeta.TimeOfDay.DAY,
            )
            right_metadata = imeta.make_right_metadata(
                pixels=right_data,
                left_metadata=left_metadata,
                camera_pose=right_pose,
                intrinsics=right_intrinsics)
            image = StereoImage(pixels=left_data,
                                right_pixels=right_data,
                                image_group=image_group,
                                metadata=left_metadata,
                                right_metadata=right_metadata)
            image.save()
            images.append(image)
            timestamps.append(timestamp)

    # Create and save the image collection
    collection = ImageCollection(images=images,
                                 timestamps=timestamps,
                                 sequence_type=ImageSequenceType.SEQUENTIAL,
                                 dataset='EuRoC MAV',
                                 sequence_name=dataset_name,
                                 trajectory_id=dataset_name)
    collection.save()
    return collection