예제 #1
0
    def _test_checksums(self):
        # If no call to `dl_manager.download`, then no need to check url presence.
        if not self._download_urls:
            return

        err_msg = (
            "Did you forget to record checksums with `--register_checksums` ? See "
            "instructions at: "
            "https://www.tensorflow.org/datasets/add_dataset#run_the_generation_codeIf"
            " want to opt-out of checksums validation, please add `SKIP_CHECKSUMS "
            "= True` to the `DatasetBuilderTestCase`.\n")
        url_infos = self.DATASET_CLASS.url_infos
        filepath = self.DATASET_CLASS._checksums_path  # pylint: disable=protected-access
        # Legacy checksums: Search in `checksums/` dir
        if url_infos is None:
            legacy_filepath = checksums._checksum_paths().get(
                self.builder.name)  # pylint: disable=protected-access
            if legacy_filepath and legacy_filepath.exists():
                filepath = legacy_filepath
                url_infos = checksums.load_url_infos(filepath)
        # Checksums not present neither in legacy nor package
        if url_infos is None:
            raise FileNotFoundError(
                f"Checksums file not found at: {filepath}\n"
                f"{err_msg}\n")

        missing_urls = self._download_urls - set(url_infos.keys())
        self.assertEmpty(
            missing_urls,
            f"Some urls checksums are missing at: {filepath}\n{err_msg}")
예제 #2
0
def _collect_path_to_url_infos(
) -> Dict[tfds.core.ReadWritePath, Dict[Url, checksums.UrlInfo]]:
    """Collect checksums paths to url_infos."""
    # Collect legacy checksums paths
    url_info_paths = list(checksums._checksum_paths().values())  # pylint: disable=protected-access

    # Collect dataset-as-folder checksums path
    for name in tfds.list_builders():
        url_info_path = tfds.builder_cls(name)._checksums_path  # pylint: disable=protected-access
        if url_info_path.exists():
            url_info_paths.append(url_info_path)

    url_info_paths = [tfds.core.utils.to_write_path(p) for p in url_info_paths]
    return {
        path: typing.cast(Dict[Url, checksums.UrlInfo],
                          checksums.load_url_infos(path))
        for path in url_info_paths
    }
예제 #3
0
def test_checksums(tmp_path: pathlib.Path):
  path = tmp_path / 'checksums.tsv'
  url_infos = {
      'http://abc.org/data': checksums.UrlInfo(
          checksum='abcd',
          size=1234,
          filename='a.zip',
      ),
      'http://edf.org/data': checksums.UrlInfo(
          checksum='abcd',
          size=1234,
          filename='b.zip',
      ),
  }

  checksums.save_url_infos(path, url_infos)
  loaded_url_infos = checksums.load_url_infos(path)
  assert loaded_url_infos == url_infos
예제 #4
0
 def download_checksums(self, checksums_url):
     """Downloads checksum file from the given URL and adds it to registry."""
     checksums_path = self.download(checksums_url)
     self._url_infos.update(checksums.load_url_infos(checksums_path))