def _test_checksums(self):
        # If no call to `dl_manager.download`, then no need to check url presence.
        if not self._download_urls:
            return

        err_msg = (
            "Did you forget to record checksums with `--register_checksums` ? See "
            "instructions at: "
            "https://www.tensorflow.org/datasets/add_dataset#run_the_generation_codeIf"
            " want to opt-out of checksums validation, please add `SKIP_CHECKSUMS "
            "= True` to the `DatasetBuilderTestCase`.\n")
        url_infos = self.DATASET_CLASS.url_infos
        filepath = self.DATASET_CLASS._checksums_path  # pylint: disable=protected-access
        # Legacy checksums: Search in `checksums/` dir
        if url_infos is None:
            legacy_filepath = checksums._checksum_paths().get(
                self.builder.name)  # pylint: disable=protected-access
            if legacy_filepath and legacy_filepath.exists():
                filepath = legacy_filepath
                url_infos = checksums.load_url_infos(filepath)
        # Checksums not present neither in legacy nor package
        if url_infos is None:
            raise FileNotFoundError(
                f"Checksums file not found at: {filepath}\n"
                f"{err_msg}\n")

        missing_urls = self._download_urls - set(url_infos.keys())
        self.assertEmpty(
            missing_urls,
            f"Some urls checksums are missing at: {filepath}\n{err_msg}")
Esempio n. 2
0
def _collect_path_to_url_infos(
) -> Dict[tfds.core.ReadWritePath, Dict[Url, checksums.UrlInfo]]:
    """Collect checksums paths to url_infos."""
    # Collect legacy checksums paths
    url_info_paths = list(checksums._checksum_paths().values())  # pylint: disable=protected-access

    # Collect dataset-as-folder checksums path
    for name in tfds.list_builders():
        url_info_path = tfds.builder_cls(name)._checksums_path  # pylint: disable=protected-access
        if url_info_path.exists():
            url_info_paths.append(url_info_path)

    url_info_paths = [tfds.core.utils.to_write_path(p) for p in url_info_paths]
    return {
        path: typing.cast(Dict[Url, checksums.UrlInfo],
                          checksums.load_url_infos(path))
        for path in url_info_paths
    }