Example #1
0
def validate_dataset(md_path: Path, log: logging.Logger):
    base_path, all_files = paths.get_dataset_paths(md_path)

    for file in all_files:
        if file.suffix.lower() in ('.nc', '.tif'):
            if not validate_image(file, log):
                return False
    return True
Example #2
0
    def dataset_folder_path(dataset_path):
        # Get their dataset's parent folders: (typically the "x_y" for tiles, the month for scenes)

        # Get the base path for the dataset.
        # eg. "LS8_SOME_SCENE_1/ga-metadata.yaml" to "LS8_SOME_SCENE_1"
        #  or "LS7_SOME_TILE.nc" to itself
        base_path, _ = get_dataset_paths(dataset_path)

        return base_path.parent
Example #3
0
    def copyable_path(self):
        """Get the path containing the whole dataset that can be copied on disk.

        The recorded self.path of datasets is the path to the metadata, but "packaged" datasets
        such as scenes have a folder hierarchy, and to copy them we want to copy the whole scene
        folder, not just the metadata file.

        (This will return a folder for a scene, and will be identical to self.path for typical NetCDFs)
        """
        package_path, _ = paths.get_dataset_paths(self.path)
        return package_path
Example #4
0
    def _compute_paths(source_metadata_path, destination_base_path):
        dataset_path, all_files = get_dataset_paths(source_metadata_path)
        _, dataset_offset = split_path_from_base(dataset_path)
        new_dataset_location = destination_base_path.joinpath(dataset_offset)
        _, metadata_offset = split_path_from_base(source_metadata_path)
        new_metadata_location = destination_base_path.joinpath(metadata_offset)

        # We currently assume all files are contained in the dataset directory/path:
        # we write the single dataset path atomically.
        if not all(str(f).startswith(str(dataset_path)) for f in all_files):
            raise NotImplementedError("Some dataset files are not contained in the dataset path. "
                                      "Situation not yet implemented. %s" % dataset_path)

        return dataset_path, new_dataset_location, new_metadata_location
Example #5
0
def _verify_checksum(log, metadata_path, dry_run=True):
    dataset_path, all_files = path_utils.get_dataset_paths(metadata_path)
    checksum_file = _expected_checksum_path(dataset_path)
    if not checksum_file.exists():
        # Ingested data doesn't currently have them, so it's only a warning.
        log.warning("checksum.missing", checksum_file=checksum_file)
        return None

    ch = verify.PackageChecksum()
    ch.read(checksum_file)
    if not dry_run:
        for file, successful in ch.iteratively_verify():
            if successful:
                log.debug("checksum.pass", file=file)
            else:
                log.error("checksum.failure", file=file)
                return False

    log.debug("copy.verify", file_count=len(all_files))
    return True