コード例 #1
0
    def _download(self, resource):
        """Download resource, returns Promise->path to downloaded file."""
        if isinstance(resource, six.string_types):
            resource = resource_lib.Resource(url=resource)
        url = resource.url
        if url in self._sizes_checksums:
            expected_sha256 = self._sizes_checksums[url][1]
            download_path = self._get_final_dl_path(url, expected_sha256)
            if not self._force_download and resource.exists_locally(
                    download_path):
                logging.info('URL %s already downloaded: reusing %s.', url,
                             download_path)
                self._recorded_sizes_checksums[url] = self._sizes_checksums[
                    url]
                return promise.Promise.resolve(download_path)
        # There is a slight difference between downloader and extractor here:
        # the extractor manages its own temp directory, while the DownloadManager
        # manages the temp directory of downloader.
        download_dir_path = os.path.join(
            self._download_dir,
            '%s.tmp.%s' % (resource_lib.get_dl_dirname(url), uuid.uuid4().hex))
        tf.io.gfile.makedirs(download_dir_path)
        logging.info('Downloading %s into %s...', url, download_dir_path)

        def callback(val):
            checksum, dl_size = val
            return self._handle_download_result(resource, download_dir_path,
                                                checksum, dl_size)

        return self._downloader.download(url, download_dir_path).then(callback)
コード例 #2
0
 def __init__(self, name, url=None):
     url = url or 'http://foo-bar.ch/%s' % name
     content = 'content of %s' % name
     self.url = url
     self.content = content
     self.size = len(content)
     self.sha = _sha256(content)
     self.size_checksum = (self.size, self.sha)
     self.checksum_size = (self.sha, self.size)
     self.dl_fname = resource_lib.get_dl_fname(url, self.sha)
     self.dl_tmp_dirname = resource_lib.get_dl_dirname(url)
コード例 #3
0
    def _download_and_extract_multipart(
            self,
            dl_manager: tfds.download.DownloadManager,
            url: str,
            parts: int,
            pwd: str = None):
        """Download and extract multipart zip file"""

        # Write OpenPose disclaimer
        if self._builder_config.include_pose == "openpose":
            print(_OPENPOSE_DISCLAIMER)

        # Make sure not already downloaded
        dirname = get_dl_dirname(url)
        output_path = os.path.join(dl_manager._download_dir, dirname)
        output_path_extracted = os.path.join(dl_manager._extract_dir, dirname)

        print("output_path", output_path)
        print("output_path_extracted", output_path_extracted)

        if not os.path.isfile(output_path):
            parts = [url + f".{i + 1:03}" for i in range(parts)]
            files = dl_manager.download(parts)

            # Cat parts to single file
            with open(output_path, "ab") as cat_file:
                for f in files:
                    with open(f, "rb") as z:
                        cat_file.write(z.read())

        if not os.path.isdir(output_path_extracted):
            # Extract file
            os.makedirs(output_path_extracted)

            pwd_bytes = bytes(pwd, "utf-8") if pwd is not None else None

            with ZipFile(output_path, "r") as zip_obj:
                # Loop over each file
                for file in tqdm(iterable=zip_obj.namelist(),
                                 total=len(zip_obj.namelist())):
                    zip_obj.extract(member=file,
                                    path=output_path_extracted,
                                    pwd=pwd_bytes)

        return output_path_extracted
コード例 #4
0
    def _download(self, resource: Union[str, resource_lib.Resource]):
        """Download resource, returns Promise->path to downloaded file.

    Args:
      resource: The URL to download.

    Returns:
      path: The path to the downloaded resource.
    """
        # Normalize the input
        if isinstance(resource, six.string_types):
            resource = resource_lib.Resource(url=resource)
        url = resource.url

        # Compute the existing path if the file was previously downloaded
        url_path = self._get_final_dl_path(
            url,
            hashlib.sha256(url.encode('utf-8')).hexdigest())
        existing_path = self._find_existing_path(url=url, url_path=url_path)

        # If register checksums and file already downloaded, then:
        # * Record the url_infos of the downloaded file
        # * Rename the filename `url_path` -> `file_path`, and return it.
        if self._register_checksums and existing_path == url_path:
            logging.info(
                'URL %s already downloaded: Recording checksums from %s.',
                url,
                existing_path,
            )
            future = self._executor.submit(
                self._save_url_info_and_rename,
                url=url,
                url_path=url_path,
                url_info=self._recorded_url_infos[url],
            )
            return promise.Promise.resolve(future)
        # Otherwise, url_infos are either already registered, or will be registered
        # in the `_handle_download_result` callback.

        # If the file file already exists (`file_path` or `url_path`), return it.
        if existing_path:
            logging.info('URL %s already downloaded: reusing %s.', url,
                         existing_path)
            return promise.Promise.resolve(existing_path)

        # Otherwise, download the file, and eventually computing the checksums.
        # There is a slight difference between downloader and extractor here:
        # the extractor manages its own temp directory, while the DownloadManager
        # manages the temp directory of downloader.
        download_dir_path = os.path.join(
            self._download_dir,
            '%s.tmp.%s' % (resource_lib.get_dl_dirname(url), uuid.uuid4().hex))
        tf.io.gfile.makedirs(download_dir_path)
        logging.info('Downloading %s into %s...', url, download_dir_path)

        def callback(url_info):
            return self._handle_download_result(
                resource=resource,
                tmp_dir_path=download_dir_path,
                url_path=url_path,
                url_info=url_info,
            )

        return self._downloader.download(
            url, download_dir_path, verify=self._verify_ssl).then(callback)
コード例 #5
0
 def test_(self):
   for url, expected in zip(self.urls, self.expected):
     res = resource.get_dl_dirname(url)
     self.assertEqual(res, expected)