コード例 #1
0
 def _sync_ftp_download(self, url, destination_path):
     out_path = os.path.join(destination_path,
                             download_util.get_file_name(url))
     urllib.request.urlretrieve(url, out_path)
     hexdigest, size = utils.read_checksum_digest(
         out_path, checksum_cls=self._checksumer)
     return hexdigest, size
コード例 #2
0
    def _download(self, resource):
        """Download resource, returns Promise->path to downloaded file."""
        if isinstance(resource, six.string_types):
            resource = resource_lib.Resource(url=resource)
        resource.sha256 = self._checksums.get(resource.url, None)
        if not resource.path:
            resource.path = os.path.join(self._download_dir, resource.fname)
        if not self._force_download and resource.exists_locally():
            logging.info('URL %s already downloaded: reusing %s.',
                         resource.url, resource.path)
            if self._record_checksum_size:
                logging.info('Reading checksum and size of %s ...',
                             resource.path)
                checksum, dl_size = utils.read_checksum_digest(resource.path)
                self._handle_download_result(resource,
                                             None,
                                             checksum,
                                             dl_size,
                                             existing=True)
            return promise.Promise.resolve(resource.path)
        # There is a slight difference between downloader and extractor here:
        # the extractor manages its own temp directory, while the DownloadManager
        # manages the temp directory of downloader.
        tmp_dir_path = '%s.tmp.%s' % (resource.path, uuid.uuid4().hex)
        tf.io.gfile.makedirs(tmp_dir_path)
        logging.info('Downloading %s into %s...', resource.url, tmp_dir_path)

        def callback(val):
            checksum, dl_size = val
            return self._handle_download_result(resource, tmp_dir_path,
                                                checksum, dl_size)

        return self._downloader.download(resource, tmp_dir_path).then(callback)
コード例 #3
0
 def _sync_file_copy(self, filepath: str,
                     destination_path: str) -> checksums_lib.UrlInfo:
     out_path = os.path.join(destination_path, os.path.basename(filepath))
     tf.io.gfile.copy(filepath, out_path)
     hexdigest, size = utils.read_checksum_digest(
         out_path, checksum_cls=self._checksumer_cls)
     return checksums_lib.UrlInfo(checksum=hexdigest, size=size)
コード例 #4
0
def _validate_checksums(
    url: str,
    path: ReadOnlyPath,
    computed_url_info: Optional[checksums.UrlInfo],
    expected_url_info: Optional[checksums.UrlInfo],
    force_checksums_validation: bool,
) -> None:
  """Validate computed_url_info match expected_url_info."""
  # If force-checksums validations, both expected and computed url_info
  # should exists
  if force_checksums_validation:
    # Checksum of the downloaded file unknown (for manually downloaded file)
    if not computed_url_info:
      computed_url_info = utils.read_checksum_digest(path)
    # Checksums have not been registered
    if not expected_url_info:
      raise ValueError(
          f'Missing checksums url: {url}, yet '
          '`force_checksums_validation=True`. '
          'Did you forgot to register checksums ?'
      )

  if (
      expected_url_info
      and computed_url_info
      and expected_url_info != computed_url_info
  ):
    msg = (
        f'Artifact {url}, downloaded to {path}, has wrong checksum. '
        f'Expected: {expected_url_info}. Got: {computed_url_info}.'
        'To debug, see: '
        'https://www.tensorflow.org/datasets/overview#fixing_nonmatchingchecksumerror'
    )
    raise NonMatchingChecksumError(msg)
コード例 #5
0
 def _sync_file_copy(
     self,
     filepath: str,
     destination_path: str,
 ) -> DownloadResult:
     filename = os.path.basename(filepath)
     out_path = os.path.join(destination_path, filename)
     tf.io.gfile.copy(filepath, out_path)
     hexdigest, size = utils.read_checksum_digest(
         out_path, checksum_cls=self._checksumer_cls)
     return DownloadResult(
         path=utils.as_path(out_path),
         url_info=checksums_lib.UrlInfo(
             checksum=hexdigest,
             size=size,
             filename=filename,
         ),
     )
コード例 #6
0
    def _check_manually_downloaded(self, url: str) -> Optional[str]:
        """Checks if file is already downloaded in manual_dir."""
        if not self._manual_dir:  # Manual dir not passed
            return None

        url_info = self._url_infos.get(url)
        if not url_info or not url_info.filename:  # Filename unknown.
            return None

        manual_path = self._manual_dir / url_info.filename
        if not manual_path.exists():  # File not manually downloaded
            return None

        # Eventually check the checksums
        if self._force_checksums_validation:
            checksum, _ = utils.read_checksum_digest(manual_path)
            if checksum != url_info.checksum:
                raise NonMatchingChecksumError(url, manual_path)

        return os.fspath(manual_path)
コード例 #7
0
def _validate_checksums(
    url: str,
    path: ReadOnlyPath,
    computed_url_info: Optional[checksums.UrlInfo],
    expected_url_info: Optional[checksums.UrlInfo],
    force_checksums_validation: bool,
) -> None:
    """Validate computed_url_info match expected_url_info."""
    # If force-checksums validations, both expected and computed url_info
    # should exists
    if force_checksums_validation:
        # Checksum of the downloaded file unknown (for manually downloaded file)
        if not computed_url_info:
            computed_url_info = utils.read_checksum_digest(path)
        # Checksums have not been registered
        if not expected_url_info:
            raise ValueError(f'Missing checksums url: {url}, yet '
                             '`force_checksums_validation=True`. '
                             'Did you forgot to register checksums ?')

    if (expected_url_info and computed_url_info
            and expected_url_info != computed_url_info):
        raise NonMatchingChecksumError(url, path)
コード例 #8
0
 def _sync_file_copy(self, filepath, destination_path):
     out_path = os.path.join(destination_path, os.path.basename(filepath))
     tf.io.gfile.copy(filepath, out_path)
     hexdigest, size = utils.read_checksum_digest(
         out_path, checksum_cls=self._checksumer)
     return hexdigest, size