Beispiel #1
0
    def test_searches_recursively(self):
        # Create a deeply nested folder structure
        base_root = Path(self.temp_folder)
        true_sequence = 3, 0, 2
        true_path = ''
        for lvl1 in range(5):
            lvl1_path = base_root / "folder_{0}".format(lvl1)
            for lvl2 in range(4):
                lvl2_path = lvl1_path / "folder_{0}".format(lvl2)
                for lvl3 in range(3):
                    path = lvl2_path / "folder_{0}".format(lvl3)
                    path.mkdir(parents=True, exist_ok=True)
                    if (lvl1, lvl2, lvl3) == true_sequence:
                        true_path = path
                        for filename in self.required_files:
                            (path / filename).touch()
                    else:
                        (path / 'decoy.txt').touch()

        # Search that structure for the one folder that has all we need
        result = tum_loader.find_files(base_root)
        self.assertEqual((
            true_path,
            true_path / 'rgb.txt',
            true_path / 'depth.txt',
            true_path / 'groundtruth.txt',
        ), result)

        # Clean up after ourselves
        shutil.rmtree(base_root)
Beispiel #2
0
    def test_needs_all_elements(self):
        root_path = Path(self.temp_folder) / 'root'
        root_path.mkdir(parents=True, exist_ok=True)
        for missing_idx in range(len(self.required_files)):
            for filename_idx in range(len(self.required_files)):
                file_path = root_path / self.required_files[filename_idx]
                if filename_idx != missing_idx and not file_path.exists():
                    # Create all the required files except one
                    file_path.touch()
                elif file_path.exists():
                    # Remove the file
                    file_path.unlink()

            with self.assertRaises(FileNotFoundError):
                tum_loader.find_files(root_path)

        # Clean up after ourselves
        shutil.rmtree(root_path)
Beispiel #3
0
    def find_roots(cls, root: typing.Union[str, bytes, PathLike, PurePath]):
        """
        Recursively search for the directories to import from the root folder.
        We're looking for folders with the same names as the
        :param root: The root folder to search. Search is recursive.
        :return:
        """
        actual_roots = {}
        tarball_roots = {}
        to_search = {Path(root).resolve()}
        while len(to_search) > 0:
            candidate_root = to_search.pop()
            for child_path in candidate_root.iterdir():
                if child_path.is_dir():
                    if child_path.name in dataset_names:
                        # this is could be a dataset folder, look for roots
                        try:
                            tum_loader.find_files(child_path)
                        except FileNotFoundError:
                            continue
                        # Find files worked, store this path
                        actual_roots[child_path.name] = child_path
                    else:
                        # Recursively search this path for more files
                        to_search.add(child_path)
                elif child_path.is_file() and tarfile.is_tarfile(child_path):
                    # the file is a tarball, check if it matches a dataset name
                    file_name = child_path.name
                    period_index = file_name.find('.')
                    if period_index > 0:
                        file_name = file_name[:
                                              period_index]  # strip all extensions.
                    if file_name in dataset_names:
                        tarball_roots[
                            file_name] = child_path.parent / file_name

        # for each dataset we found a tarball for, but not a root folder, store the tarball as the root
        for dataset_name in set(tarball_roots.keys()) - set(
                actual_roots.keys()):
            actual_roots[dataset_name] = tarball_roots[dataset_name]

        return actual_roots
Beispiel #4
0
    def test_finds_root_with_required_files(self):
        root_path = Path(self.temp_folder) / 'root'
        root_path.mkdir(parents=True, exist_ok=True)
        for filename in self.required_files:
            (root_path / filename).touch()

        result = tum_loader.find_files(root_path)
        self.assertEqual((
            root_path,
            root_path / 'rgb.txt',
            root_path / 'depth.txt',
            root_path / 'groundtruth.txt',
        ), result)

        # Clean up after ourselves
        shutil.rmtree(root_path)
Beispiel #5
0
def verify_dataset(image_collection: ImageCollection,
                   root_folder: typing.Union[str, Path],
                   dataset_name: str,
                   repair: bool = False):
    """
    Load a TUM RGB-D sequence into the database.


    :return:
    """
    root_folder = Path(root_folder)
    dataset_name = str(dataset_name)
    repair = bool(repair)
    valid = True
    irreparable = False
    image_group = dataset_name

    # Check the root folder to see if it needs to be extracted from a tarfile
    delete_when_done = None
    if not root_folder.is_dir():
        if (root_folder.parent / dataset_name).is_dir():
            # The root was a tarball, but the extracted data already exists, just use that as the root
            root_folder = root_folder.parent / dataset_name
        else:
            candidate_tar_file = root_folder.parent / (dataset_name + '.tgz')
            if candidate_tar_file.is_file() and tarfile.is_tarfile(
                    candidate_tar_file):
                # Root is actually a tarfile, extract it. find_roots with handle folder structures
                with tarfile.open(candidate_tar_file) as tar_fp:
                    tar_fp.extractall(root_folder.parent / dataset_name)
                root_folder = root_folder.parent / dataset_name
                delete_when_done = root_folder
            else:
                # Could find neither a dir nor a tarfile to extract from
                raise NotADirectoryError(
                    "'{0}' is not a directory".format(root_folder))

    # Check the image group on the image collection
    if image_collection.image_group != image_group:
        if repair:
            image_collection.image_group = image_group
            image_collection.save()
            logging.getLogger(__name__).info(
                f"Fixed incorrect image group for {image_collection.sequence_name}"
            )
        else:
            logging.getLogger(__name__).warning(
                f"{image_collection.sequence_name} has incorrect image group {image_collection.image_group}"
            )
            valid = False

    # Find the relevant metadata files
    root_folder, rgb_path, depth_path, trajectory_path = tum_loader.find_files(
        root_folder)

    # Step 2: Read the metadata from them
    image_files = tum_loader.read_image_filenames(rgb_path)
    trajectory = tum_loader.read_trajectory(trajectory_path,
                                            image_files.keys())
    depth_files = tum_loader.read_image_filenames(depth_path)

    # Step 3: Associate the different data types by timestamp
    all_metadata = tum_loader.associate_data(image_files, trajectory,
                                             depth_files)

    # Step 3: Load the images from the metadata
    total_invalid_images = 0
    total_fixed_images = 0
    with arvet.database.image_manager.get().get_group(image_group,
                                                      allow_write=repair):
        for img_idx, (timestamp, image_file, camera_pose,
                      depth_file) in enumerate(all_metadata):
            changed = False
            img_valid = True
            img_path = root_folder / image_file
            depth_path = root_folder / depth_file
            rgb_data = image_utils.read_colour(img_path)
            depth_data = image_utils.read_depth(depth_path)
            depth_data = depth_data / 5000  # Re-scale depth to meters
            img_hash = bytes(xxhash.xxh64(rgb_data).digest())

            # Load the image from the database
            try:
                _, image = image_collection[img_idx]
            except (KeyError, IOError, RuntimeError):
                logging.getLogger(__name__).exception(
                    f"Error loading image object {img_idx}")
                valid = False
                total_invalid_images += 1
                continue

            # First, check the image group
            if image.image_group != image_group:
                if repair:
                    image.image_group = image_group
                    changed = True
                logging.getLogger(__name__).warning(
                    f"Image {img_idx} has incorrect group {image.image_group}")
                valid = False
                img_valid = False

            # Load the pixels from the image
            try:
                actual_pixels = image.pixels
            except (KeyError, IOError, RuntimeError):
                actual_pixels = None
            try:
                actual_depth = image.depth
            except (KeyError, IOError, RuntimeError):
                actual_depth = None

            # Compare the loaded image data to the data read from disk
            if actual_pixels is None or not np.array_equal(
                    rgb_data, actual_pixels):
                if repair:
                    image.store_pixels(rgb_data)
                    changed = True
                else:
                    logging.getLogger(__name__).error(
                        f"Image {img_idx}: Pixels do not match data read from {img_path}"
                    )
                valid = False
                img_valid = False
            if img_hash != bytes(image.metadata.img_hash):
                if repair:
                    image.metadata.img_hash = img_hash
                    changed = True
                else:
                    logging.getLogger(__name__).error(
                        f"Image {img_idx}: Image hash does not match metadata")
                valid = False
                img_valid = False
            if actual_depth is None or not np.array_equal(
                    depth_data, actual_depth):
                if repair:
                    image.store_depth(depth_data)
                    changed = True
                else:
                    logging.getLogger(__name__).error(
                        f"Image {img_idx}: Depth does not match data read from {depth_path}"
                    )
                valid = False
                img_valid = False
            if changed and repair:
                logging.getLogger(__name__).warning(
                    f"Image {img_idx}: repaired")
                image.save()
                total_fixed_images += 1
            if not img_valid:
                total_invalid_images += 1

    if irreparable:
        # Images are missing entirely, needs re-import
        logging.getLogger(__name__).error(
            f"Image Collection {image_collection.pk} for sequence {dataset_name} "
            "is IRREPARABLE, invalidate and re-import")
    elif repair:
        # Re-save the modified image collection
        logging.getLogger(__name__).info(
            f"{image_collection.sequence_name} repaired successfully "
            f"({total_fixed_images} image files fixed).")
    elif valid:
        logging.getLogger(__name__).info(
            f"Verification of {image_collection.sequence_name} successful.")
    else:
        logging.getLogger(__name__).error(
            f"Verification of {image_collection.sequence_name} ({image_collection.pk}) "
            f"FAILED, ({total_invalid_images} images failed)")

    if delete_when_done is not None and delete_when_done.exists():
        # We're done and need to clean up after ourselves
        shutil.rmtree(delete_when_done)

    return valid
Beispiel #6
0
 def verify_dataset(self, name: str, repair: bool = False):
     if name in self._full_paths:
         import_dataset_task = task_manager.get_import_dataset_task(
             module_name=tum_loader.__name__,
             path=str(self._full_paths[name]),
             additional_args={'dataset_name': name})
         if import_dataset_task.is_finished:
             image_collection = import_dataset_task.get_result()
             return tum_validator.verify_dataset(image_collection,
                                                 self._full_paths[name],
                                                 name, repair)
         else:
             # Try looking for an existing tarfile
             for candidate_path in [
                     str(self._full_paths[name]) + '.tar.gz',
                     str(self._full_paths[name]) + '.tgz',
             ]:
                 import_dataset_task = task_manager.get_import_dataset_task(
                     module_name=tum_loader.__name__,
                     path=candidate_path,
                     additional_args={'dataset_name': name})
                 if import_dataset_task.is_finished:
                     break
             if import_dataset_task.is_finished:
                 if repair:
                     logging.getLogger(__name__).warning(
                         f"Removed suffix from tarball import task for {name}, it should get returned next time"
                     )
                     import_dataset_task.path = self._full_paths[name]
                     import_dataset_task.save()
                 image_collection = import_dataset_task.get_result()
                 return tum_validator.verify_dataset(
                     image_collection, self._full_paths[name], name, repair)
             else:
                 # Try looking for an existing task with the actual root from find_files as the path
                 try:
                     actual_root = tum_loader.find_files(
                         self._full_paths[name])
                 except FileNotFoundError:
                     actual_root = None
                 if actual_root is not None and len(actual_root) > 0:
                     import_dataset_task = task_manager.get_import_dataset_task(
                         module_name=tum_loader.__name__,
                         path=actual_root[0],
                         additional_args={'dataset_name': name})
                 else:
                     import_dataset_task = None
                 if import_dataset_task is not None and import_dataset_task.is_finished:
                     if repair:
                         logging.getLogger(__name__).warning(
                             f"Shortened path for {name}, it should get returned next time"
                         )
                         import_dataset_task.path = self._full_paths[name]
                         import_dataset_task.save()
                     image_collection = import_dataset_task.get_result()
                     return tum_validator.verify_dataset(
                         image_collection, self._full_paths[name], name,
                         repair)
                 else:
                     logging.getLogger(__name__).warning(
                         f"Cannot validate {name}, it is not loaded yet? "
                         f"(looking for module name \"{tum_loader.__name__}\", "
                         f"path \"{str(self._full_paths[name])}\", "
                         f"additional args \"{ {'dataset_name': name} }\")")
                     return True
     raise NotADirectoryError(
         "No root folder for {0}, did you download it?".format(name))