コード例 #1
0
def _validate_checksums(
    url: str,
    path: epath.Path,
    computed_url_info: Optional[checksums.UrlInfo],
    expected_url_info: Optional[checksums.UrlInfo],
    force_checksums_validation: bool,
) -> None:
    """Validate computed_url_info match expected_url_info."""
    # If force-checksums validations, both expected and computed url_info
    # should exists
    if force_checksums_validation:
        # Checksum of the downloaded file unknown (for manually downloaded file)
        if not computed_url_info:
            computed_url_info = checksums.compute_url_info(path)
        # Checksums have not been registered
        if not expected_url_info:
            raise ValueError(f'Missing checksums url: {url}, yet '
                             '`force_checksums_validation=True`. '
                             'Did you forget to register checksums?')

    if (expected_url_info and computed_url_info
            and expected_url_info != computed_url_info):
        msg = (
            f'Artifact {url}, downloaded to {path}, has wrong checksum:\n'
            f'* Expected: {expected_url_info}\n'
            f'* Got: {computed_url_info}\n'
            'To debug, see: '
            'https://www.tensorflow.org/datasets/overview#fixing_nonmatchingchecksumerror'
        )
        raise NonMatchingChecksumError(msg)
コード例 #2
0
def _compute_dir_hash(path: utils.ReadOnlyPath) -> str:
  """Computes the checksums of the given directory deterministically."""
  all_files = sorted(path.iterdir())

  if any(f.is_dir() for f in all_files):
    raise ValueError('Installed package should only contains files.')

  # Concatenate the filenames and files content to create the directory hash
  all_checksums = [f.name for f in all_files]
  all_checksums += [checksums.compute_url_info(f).checksum for f in all_files]
  return hashlib.sha256(''.join(all_checksums).encode()).hexdigest()
コード例 #3
0
def test_compute_url_info():
  filepath = utils.tfds_path() / 'testing/test_data/6pixels.png'

  expected_url_info = checksums.UrlInfo(
      checksum=
      '04f38ebed34d3b027d2683193766155912fba647158c583c3bdb4597ad8af34c',
      size=utils.Size(102),
      filename='6pixels.png',
  )
  url_info = checksums.compute_url_info(filepath, checksum_cls=hashlib.sha256)
  assert url_info == expected_url_info
  assert url_info.filename == expected_url_info.filename
コード例 #4
0
 def _sync_file_copy(
     self,
     filepath: str,
     destination_path: str,
 ) -> DownloadResult:
     """Downloads the file through `tf.io.gfile` API."""
     filename = os.path.basename(filepath)
     out_path = os.path.join(destination_path, filename)
     tf.io.gfile.copy(filepath, out_path)
     url_info = checksums_lib.compute_url_info(
         out_path, checksum_cls=self._checksumer_cls)
     self._pbar_dl_size.update_total(url_info.size)
     self._pbar_dl_size.update(url_info.size)
     self._pbar_url.update(1)
     return DownloadResult(path=epath.Path(out_path), url_info=url_info)