def validate_dataset(md_path: Path, log: logging.Logger): base_path, all_files = paths.get_dataset_paths(md_path) for file in all_files: if file.suffix.lower() in ('.nc', '.tif'): if not validate_image(file, log): return False return True
def dataset_folder_path(dataset_path): # Get their dataset's parent folders: (typically the "x_y" for tiles, the month for scenes) # Get the base path for the dataset. # eg. "LS8_SOME_SCENE_1/ga-metadata.yaml" to "LS8_SOME_SCENE_1" # or "LS7_SOME_TILE.nc" to itself base_path, _ = get_dataset_paths(dataset_path) return base_path.parent
def copyable_path(self): """Get the path containing the whole dataset that can be copied on disk. The recorded self.path of datasets is the path to the metadata, but "packaged" datasets such as scenes have a folder hierarchy, and to copy them we want to copy the whole scene folder, not just the metadata file. (This will return a folder for a scene, and will be identical to self.path for typical NetCDFs) """ package_path, _ = paths.get_dataset_paths(self.path) return package_path
def _compute_paths(source_metadata_path, destination_base_path): dataset_path, all_files = get_dataset_paths(source_metadata_path) _, dataset_offset = split_path_from_base(dataset_path) new_dataset_location = destination_base_path.joinpath(dataset_offset) _, metadata_offset = split_path_from_base(source_metadata_path) new_metadata_location = destination_base_path.joinpath(metadata_offset) # We currently assume all files are contained in the dataset directory/path: # we write the single dataset path atomically. if not all(str(f).startswith(str(dataset_path)) for f in all_files): raise NotImplementedError("Some dataset files are not contained in the dataset path. " "Situation not yet implemented. %s" % dataset_path) return dataset_path, new_dataset_location, new_metadata_location
def _verify_checksum(log, metadata_path, dry_run=True): dataset_path, all_files = path_utils.get_dataset_paths(metadata_path) checksum_file = _expected_checksum_path(dataset_path) if not checksum_file.exists(): # Ingested data doesn't currently have them, so it's only a warning. log.warning("checksum.missing", checksum_file=checksum_file) return None ch = verify.PackageChecksum() ch.read(checksum_file) if not dry_run: for file, successful in ch.iteratively_verify(): if successful: log.debug("checksum.pass", file=file) else: log.error("checksum.failure", file=file) return False log.debug("copy.verify", file_count=len(all_files)) return True