Esempio n. 1
0
def _verify_checksum(log, metadata_path, dry_run=True):
    dataset_path, all_files = path_utils.get_dataset_paths(metadata_path)
    checksum_file = _expected_checksum_path(dataset_path)
    if not checksum_file.exists():
        # Ingested data doesn't currently have them, so it's only a warning.
        log.warning("checksum.missing", checksum_file=checksum_file)
        return None

    ch = verify.PackageChecksum()
    ch.read(checksum_file)
    if not dry_run:
        for file, successful in ch.iteratively_verify():
            if successful:
                log.debug("checksum.pass", file=file)
            else:
                log.error("checksum.failure", file=file)
                return False

    log.debug("copy.verify", file_count=len(all_files))
    return True
def prepare_dataset(base_path, write_checksum=True):
    # type: (Path, bool) -> Optional[Dict]
    mtl_doc, mtl_filename = get_mtl_content(base_path)

    if not mtl_doc:
        return None

    additional = {}
    if write_checksum:
        checksum_path = _checksum_path(base_path)
        if checksum_path.exists():
            logging.warning("Checksum path exists. Not touching it. %r", checksum_path)
        else:
            checksum = verify.PackageChecksum()
            checksum.add_file(base_path)
            checksum.write(checksum_path)
        additional['checksum_sha1'] = str(relative_path(base_path, checksum_path))

    return prepare_dataset_from_mtl(
        _file_size_bytes(base_path),
        mtl_doc,
        mtl_filename,
        additional_props=additional
    )
Esempio n. 3
0
def package_dataset(dataset_driver,
                    dataset,
                    image_path,
                    target_path,
                    hard_link=False,
                    additional_files=None):
    """
    Package the given dataset folder.

    This includes copying the dataset into a folder, generating
    metadata and checksum files, as well as optionally generating
    a browse image.

    Validates, and *Modifies* the passed in dataset with extra metadata.

    :type hard_link: bool
    :type dataset_driver: eodatasets.drivers.DatasetDriver
    :type dataset: ptype.Dataset
    :type image_path: Path
    :type target_path: Path
    :param additional_files: Additional files to record in the package.
    :type additional_files: tuple[Path]

    :raises IncompletePackage: If not enough metadata can be extracted from the dataset.
    :return: The generated GA Dataset ID (ga_label)
    :rtype: str
    """
    if additional_files is None:
        additional_files = []
    _check_additional_files_exist(additional_files)

    dataset_driver.fill_metadata(dataset,
                                 image_path,
                                 additional_files=additional_files)

    checksums = verify.PackageChecksum()

    target_path = target_path.absolute()
    image_path = image_path.absolute()

    target_metadata_path = documents.find_metadata_path(target_path)
    if target_metadata_path is not None and target_metadata_path.exists():
        _LOG.info('Already packaged? Skipping %s', target_path)
        return None

    _LOG.debug('Packaging %r -> %r', image_path, target_path)
    package_directory = target_path.joinpath('product')

    file_paths = []

    def save_target_checksums_and_paths(source_path, target_paths):
        _LOG.debug('%r -> %r', source_path, target_paths)
        checksums.add_files(target_paths)
        file_paths.extend(target_paths)

    prepare_target_imagery(image_path,
                           destination_directory=package_directory,
                           include_path=dataset_driver.include_file,
                           translate_path=partial(
                               dataset_driver.translate_path, dataset),
                           after_file_copy=save_target_checksums_and_paths,
                           hard_link=hard_link)

    write_additional_files(additional_files, checksums, target_path)

    validate_metadata(dataset)
    dataset = expand_driver_metadata(dataset_driver, dataset, file_paths)

    #: :type: ptype.DatasetMetadata
    dataset = ptype.rebase_paths(image_path, package_directory, dataset)

    create_dataset_browse_images(dataset_driver,
                                 dataset,
                                 target_path,
                                 after_file_creation=checksums.add_file)

    target_checksums_path = target_path / GA_CHECKSUMS_FILE_NAME
    dataset.checksum_path = target_checksums_path

    target_metadata_path = serialise.write_dataset_metadata(
        target_path, dataset)

    checksums.add_file(target_metadata_path)
    checksums.write(target_checksums_path)

    return dataset.ga_label