コード例 #1
0
 def test_extract(self):
     """One file already extracted, one file with NO_EXTRACT, one to extract."""
     resource_cached = resource_lib.Resource(path='/dl_dir/cached',
                                             extract_method=ZIP)
     resource_new = resource_lib.Resource(path='/dl_dir/new',
                                          extract_method=TAR)
     resource_noextract = resource_lib.Resource(path='/dl_dir/noextract',
                                                extract_method=NO_EXTRACT)
     files = {
         'cached': resource_cached,
         'new': resource_new,
         'noextract': resource_noextract,
     }
     self.existing_paths.append('/extract_dir/ZIP.%s' %
                                resource_cached.fname)
     extracted_new, self.extract_results[
         '/dl_dir/%s' % resource_new.fname] = (
             _get_promise_on_event('/extract_dir/TAR.new'))
     manager = self._get_manager()
     extracted_new.set()
     res = manager.extract(files)
     expected = {
         'cached': '/extract_dir/ZIP.%s' % resource_cached.fname,
         'new': '/extract_dir/TAR.%s' % resource_new.fname,
         'noextract': '/dl_dir/%s' % resource_noextract.fname,
     }
     self.assertEqual(res, expected)
コード例 #2
0
 def test_download_and_extract(self):
     url_a = 'http://a/a.zip'
     url_b = 'http://b/b'
     sha_contenta = _sha256('content from a.zip')
     sha_contentb = _sha256('content from b')
     resource_a = resource_lib.Resource(url=url_a)
     resource_a.sha256 = sha_contenta
     resource_b = resource_lib.Resource(url=url_b)
     resource_b.sha256 = sha_contentb
     self.file_names[resource_a.fname] = 'a.zip'
     dl_a, self.dl_results[url_a] = _get_promise_on_event(
         (sha_contenta, 10))
     dl_b, self.dl_results[url_b] = _get_promise_on_event(
         (sha_contentb, 10))
     ext_a, self.extract_results['/dl_dir/%s' %
                                 resource_a.fname] = (_get_promise_on_event(
                                     '/extract_dir/ZIP.%s' %
                                     resource_a.fname))
     # url_b doesn't need any extraction.
     for event in [dl_a, dl_b, ext_a]:
         event.set()
     manager = self._get_manager()
     manager._checksums[url_a] = sha_contenta
     manager._checksums[url_b] = sha_contentb
     res = manager.download_and_extract({'a': url_a, 'b': url_b})
     expected = {
         'a': '/extract_dir/ZIP.%s' % resource_a.fname,
         'b': '/dl_dir/%s' % resource_b.fname,
     }
     self.assertEqual(res, expected)
コード例 #3
0
 def test_download(self):
     """One file in cache, one not."""
     urls = {
         'cached': resource_lib.Resource(url='http://a.ch/a'),
         'new': resource_lib.Resource(url='https://a.ch/b'),
         # INFO file of c has been deleted:
         'info_deleted': resource_lib.Resource(url='https://a.ch/c'),
     }
     urla_sha256 = _sha256('http://a.ch/a')
     urlb_sha256 = _sha256('https://a.ch/b')
     urlc_sha256 = _sha256('https://a.ch/c')
     _ = [
         self._add_file(path) for path in [
             '/dl_dir/%s' % urla_sha256,
             '/dl_dir/%s.INFO' % urla_sha256,
             '/dl_dir/%s' % urlc_sha256,
         ]
     ]
     downloaded_b, self.dl_results[
         'https://a.ch/b'] = _get_promise_on_event(('sha_b', 10))
     downloaded_c, self.dl_results[
         'https://a.ch/c'] = _get_promise_on_event(('sha_c', 10))
     manager = self._get_manager()
     res = manager.download(urls, async_=True)
     self.assertFalse(res.is_fulfilled)
     downloaded_b.set()
     downloaded_c.set()
     downloads = res.get()
     self.assertEqual(
         downloads, {
             'cached': '/dl_dir/%s' % urla_sha256,
             'new': '/dl_dir/%s' % urlb_sha256,
             'info_deleted': '/dl_dir/%s' % urlc_sha256,
         })
コード例 #4
0
 def __init__(self, name, url=None):
     url = url or 'http://foo-bar.ch/%s' % name
     content = 'content of %s' % name
     self.url = url
     self.content = content
     self.size = len(content)
     self.sha = _sha256(content)
     self.size_checksum = (self.size, self.sha)
     self.checksum_size = (self.sha, self.size)
     self.resource = resource_lib.Resource(url=url)
     self.resource_sha = resource_lib.Resource(url=url)
     self.resource_sha.sha256 = self.sha
コード例 #5
0
 def _test_extract(self, method, archive_name, expected_files):
   from_path = os.path.join(self.test_data, 'archives', archive_name)
   resource = resource_lib.Resource(path=from_path, extract_method=method)
   self.extractor.extract(resource, self.to_path).get()
   for name, content in expected_files.items():
     path = os.path.join(self.to_path, name)
     self.assertEqual(_read(path), content, 'File %s has bad content.' % path)
コード例 #6
0
 def test_gzip(self):
   from_path = os.path.join(self.test_data, 'archives', 'arch1.tar.gz')
   resource = resource_lib.Resource(
       path=from_path, extract_method=resource_lib.ExtractMethod.GZIP)
   self.extractor.extract(resource, self.to_path).get()
   arch1_path = os.path.join(self.test_data, 'archives', 'arch1.tar')
   self.assertEqual(_read(self.to_path), _read(arch1_path))
コード例 #7
0
ファイル: extractor_test.py プロジェクト: shunsunsun/datasets
 def test_bzip2(self):
   from_path = os.path.join(self.test_data, 'archives', 'foo.csv.bz2')
   resource = resource_lib.Resource(
       path=from_path, extract_method=resource_lib.ExtractMethod.BZIP2)
   self.extractor.extract(resource, self.to_path).get()
   foo_csv_path = os.path.join(self.test_data, 'foo.csv')
   self.assertEqual(_read(self.to_path), _read(foo_csv_path))
コード例 #8
0
 def test_force_download_and_extract(self):
   url = 'http://a/b.tar.gz'
   resource_ = resource_lib.Resource(url=url)
   resource_.sha256 = _sha256('content of file')
   # resource was already downloaded / extracted:
   self.existing_paths = ['/dl_dir/%s' % resource_.fname,
                          '/extract_dir/TAR_GZ.%s' % resource_.fname]
   self.file_names[resource_.fname] = 'b.tar.gz'
   self._write_info('/dl_dir/%s.INFO' % resource_.fname,
                    {'original_fname': 'b.tar.gz'})
   dl_a, self.dl_results[url] = _get_promise_on_event((resource_.sha256, 10))
   ext_a, self.extract_results['/dl_dir/%s' % resource_.fname] = (
       _get_promise_on_event('/extract_dir/TAR_GZ.%s' % resource_.fname))
   dl_a.set()
   ext_a.set()
   manager = self._get_manager(force_download=True, force_extraction=True,
                               checksums={url: resource_.sha256})
   res = manager.download_and_extract(url)
   expected = '/extract_dir/TAR_GZ.%s' % resource_.fname
   self.assertEqual(expected, res)
   # Rename after download:
   (from_, to), kwargs = self.gfile.rename.call_args
   self.assertTrue(re.match(
       r'/dl_dir/%s\.tmp\.[a-h0-9]{32}/b.tar.gz' % resource_.fname, from_))
   self.assertEqual('/dl_dir/%s' % resource_.fname, to)
   self.assertEqual(kwargs, {'overwrite': True})
   self.assertEqual(1, self.downloader_download.call_count)
   self.assertEqual(1, self.extractor_extract.call_count)
コード例 #9
0
ファイル: downloader_test.py プロジェクト: mbbessa/datasets
 def setUp(self):
     super(DownloaderTest, self).setUp()
     self.addCleanup(mock.patch.stopall)
     self.downloader = downloader.get_downloader(10, hashlib.sha256)
     self.tmp_dir = tempfile.mkdtemp(dir=tf.compat.v1.test.get_temp_dir())
     self.url = 'http://example.com/foo.tar.gz'
     self.resource = resource_lib.Resource(url=self.url)
     self.path = os.path.join(self.tmp_dir, 'foo.tar.gz')
     self.incomplete_path = '%s.incomplete' % self.path
     self.response = b'This \nis an \nawesome\n response!'
     self.resp_checksum = hashlib.sha256(self.response).hexdigest()
     self.cookies = {}
     mock.patch.object(
         downloader.requests.Session,
         'get',
         lambda *a, **kw: _FakeResponse(self.url, self.response, self.
                                        cookies),
     ).start()
     self.downloader._pbar_url = mock.MagicMock()
     self.downloader._pbar_dl_size = mock.MagicMock()
     mock.patch.object(
         downloader.urllib.request,
         'urlopen',
         lambda *a, **kw: _FakeResponse(self.url, self.response, self.
                                        cookies),
     ).start()
コード例 #10
0
    def _download(self, resource):
        """Download resource, returns Promise->path to downloaded file."""
        if isinstance(resource, six.string_types):
            resource = resource_lib.Resource(url=resource)
        url = resource.url
        if url in self._sizes_checksums:
            expected_sha256 = self._sizes_checksums[url][1]
            download_path = self._get_final_dl_path(url, expected_sha256)
            if not self._force_download and resource.exists_locally(
                    download_path):
                logging.info('URL %s already downloaded: reusing %s.', url,
                             download_path)
                self._recorded_sizes_checksums[url] = self._sizes_checksums[
                    url]
                return promise.Promise.resolve(download_path)
        # There is a slight difference between downloader and extractor here:
        # the extractor manages its own temp directory, while the DownloadManager
        # manages the temp directory of downloader.
        download_dir_path = os.path.join(
            self._download_dir,
            '%s.tmp.%s' % (resource_lib.get_dl_dirname(url), uuid.uuid4().hex))
        tf.io.gfile.makedirs(download_dir_path)
        logging.info('Downloading %s into %s...', url, download_dir_path)

        def callback(val):
            checksum, dl_size = val
            return self._handle_download_result(resource, download_dir_path,
                                                checksum, dl_size)

        return self._downloader.download(url, download_dir_path).then(callback)
コード例 #11
0
 def setUp(self):
     self.addCleanup(absltest.mock.patch.stopall)
     self.downloader = downloader.get_downloader(10, hashlib.sha256)
     self.tmp_dir = tempfile.mkdtemp(dir=tf.compat.v1.test.get_temp_dir())
     self.url = 'http://example.com/foo.tar.gz'
     self.resource = resource_lib.Resource(url=self.url)
     self.path = os.path.join(self.tmp_dir, 'foo.tar.gz')
     self.incomplete_path = '%s.incomplete' % self.path
     self.response = b'This \nis an \nawesome\n response!'
     self.resp_checksum = hashlib.sha256(self.response).hexdigest()
     self.cookies = {}
     absltest.mock.patch.object(
         downloader.requests.Session,
         'get',
         lambda *a, **kw: _FakeResponse(self.url, self.response, self.
                                        cookies),
     ).start()
     self.downloader._pbar_url = absltest.mock.MagicMock()
     self.downloader._pbar_dl_size = absltest.mock.MagicMock()
     absltest.mock.patch.object(
         downloader.urllib.request,
         'urlopen',
         lambda *a, **kw: _FakeResponse(self.url, self.response, self.
                                        cookies),
     ).start()
     if not hasattr(downloader.ssl, '_create_unverified_context'):
         # To not throw error for python<=2.7.8 while mocking SSLContext functions
         downloader.ssl.__dict__['_create_unverified_context'] = None
         downloader.ssl.__dict__['create_default_context'] = None
     # dummy ssl contexts returns for testing
     absltest.mock.patch.object(downloader.ssl,
                                '_create_unverified_context',
                                lambda *a, **kw: 'skip_ssl').start()
     absltest.mock.patch.object(downloader.ssl, 'create_default_context',
                                lambda *a, **kw: 'use_ssl').start()
コード例 #12
0
    def _download(self, resource):
        """Download resource, returns Promise->path to downloaded file."""
        if isinstance(resource, six.string_types):
            resource = resource_lib.Resource(url=resource)
        resource.sha256 = self._checksums.get(resource.url, None)
        if not resource.path:
            resource.path = os.path.join(self._download_dir, resource.fname)
        if not self._force_download and resource.exists_locally():
            logging.info('URL %s already downloaded: reusing %s.',
                         resource.url, resource.path)
            if self._record_checksum_size:
                logging.info('Reading checksum and size of %s ...',
                             resource.path)
                checksum, dl_size = utils.read_checksum_digest(resource.path)
                self._handle_download_result(resource,
                                             None,
                                             checksum,
                                             dl_size,
                                             existing=True)
            return promise.Promise.resolve(resource.path)
        # There is a slight difference between downloader and extractor here:
        # the extractor manages its own temp directory, while the DownloadManager
        # manages the temp directory of downloader.
        tmp_dir_path = '%s.tmp.%s' % (resource.path, uuid.uuid4().hex)
        tf.io.gfile.makedirs(tmp_dir_path)
        logging.info('Downloading %s into %s...', resource.url, tmp_dir_path)

        def callback(val):
            checksum, dl_size = val
            return self._handle_download_result(resource, tmp_dir_path,
                                                checksum, dl_size)

        return self._downloader.download(resource, tmp_dir_path).then(callback)
コード例 #13
0
  def setUp(self):
    self.addCleanup(absltest.mock.patch.stopall)
    self.downloader = downloader.get_downloader(10, hashlib.sha256)
    self.tmp_dir = tempfile.mkdtemp(dir=tf.compat.v1.test.get_temp_dir())
    self.url = 'http://example.com/foo.tar.gz'
    self.resource = resource_lib.Resource(url=self.url)
    self.path = os.path.join(self.tmp_dir, 'foo.tar.gz')
    self.incomplete_path = '%s.incomplete' % self.path
    self.response = b'This \nis an \nawesome\n response!'
    self.resp_checksum = hashlib.sha256(self.response).hexdigest()
    self.cookies = {}
    absltest.mock.patch.object(
        downloader.requests.Session,
        'get',
        lambda *a, **kw: _FakeResponse(self.url, self.response, self.cookies),
    ).start()
    self.downloader._pbar_url = absltest.mock.MagicMock()
    self.downloader._pbar_dl_size = absltest.mock.MagicMock()

    def write_fake_ftp_result(_, filename):
      with open(filename, 'wb') as result:
        result.write(self.response)

    absltest.mock.patch.object(
        downloader.urllib.request,
        'urlretrieve',
        write_fake_ftp_result,
    ).start()
コード例 #14
0
 def test_absolute_path(self):
   from_path = os.path.join(self.test_data, 'archives', 'absolute_path.tar')
   resource = resource_lib.Resource(
       path=from_path, extract_method=resource_lib.ExtractMethod.TAR)
   promise = self.extractor.extract(resource, self.to_path)
   with self.assertRaisesRegex(extractor.ExtractError,
                               'Archive at .* is not safe'):
     promise.get()
コード例 #15
0
 def test_gzip2(self):
   # Same as previous test, except it is not a .tar.gz, but a .gz.
   from_path = os.path.join(self.test_data, 'archives', 'foo.csv.gz')
   resource = resource_lib.Resource(
       path=from_path, extract_method=resource_lib.ExtractMethod.GZIP)
   self.extractor.extract(resource, self.to_path).get()
   foo_csv_path = os.path.join(self.test_data, 'foo.csv')
   self.assertEqual(_read(self.to_path), _read(foo_csv_path))
コード例 #16
0
 def _download_extract(self, resource):
   """Download-extract `Resource` or url, returns Promise->path."""
   if isinstance(resource, six.string_types):
     resource = resource_lib.Resource(url=resource)
   def callback(path):
     resource.path = path
     return self._extract(resource)
   return self._download(resource).then(callback)
コード例 #17
0
 def test_ftp(self):
     resource = resource_lib.Resource(
         url='ftp://*****:*****@example.com/foo.tar.gz')
     promise = self.downloader.download(resource, self.tmp_dir)
     checksum, _ = promise.get()
     self.assertEqual(checksum, self.resp_checksum)
     with open(self.path, 'rb') as result:
         self.assertEqual(result.read(), self.response)
     self.assertFalse(tf.io.gfile.exists(self.incomplete_path))
コード例 #18
0
 def test_drive_no_cookies(self):
   resource = resouce_lib.Resource(
       url='https://drive.google.com/uc?export=download&id=a1b2bc3')
   promise = self.downloader.download(resource, self.tmp_dir)
   checksum, _ = promise.get()
   self.assertEqual(checksum, self.resp_checksum)
   with open(self.path, 'rb') as result:
     self.assertEqual(result.read(), self.response)
   self.assertFalse(tf.io.gfile.exists(self.incomplete_path))
コード例 #19
0
ファイル: base.py プロジェクト: jackd/shape-tfds
def get_modelnet40_aligned_data_dir(dl_manager=None):
    dl_manager = dl_manager or get_dl_manager(
        dataset_name="modelnet40_aligned")
    path = dl_manager.download(
        "https://lmb.informatik.uni-freiburg.de/resources/datasets/ORION/modelnet40_manually_aligned.tar"
    )
    folder = dl_manager.extract(
        resource_lib.Resource(
            path=path, extract_method=resource_lib.ExtractMethod.TAR_GZ))
    return folder
コード例 #20
0
 def test_kaggle_api(self):
     fname = 'a.csv'
     with testing.mock_kaggle_api(filenames=[fname, 'b.txt']):
         resource = resource_lib.Resource(
             url='kaggle://some-competition/a.csv')
         promise = self.downloader.download(resource, self.tmp_dir)
         _, dl_size = promise.get()
         self.assertEqual(dl_size, len(fname))
         with tf.io.gfile.GFile(os.path.join(self.tmp_dir, fname)) as f:
             self.assertEqual(fname, f.read())
コード例 #21
0
 def test_wrong_method(self):
   from_path = os.path.join(self.test_data, 'archives', 'foo.csv.gz')
   resource = resource_lib.Resource(
       path=from_path, extract_method=resource_lib.ExtractMethod.ZIP,
       url='http://example.com/foo.zip')
   promise = self.extractor.extract(resource, self.to_path)
   expected_msg = re.escape(
       'foo.csv.gz (http://example.com/foo.zip): File is not a zip file.')
   with self.assertRaisesRegex(extractor.ExtractError, expected_msg):
     promise.get()
コード例 #22
0
 def test_ftp_error(self):
     error = downloader.urllib.error.URLError('Problem serving file.')
     absltest.mock.patch.object(
         downloader.urllib.request,
         'urlretrieve',
         side_effect=error,
     ).start()
     resource = resource_lib.Resource(url='ftp://example.com/foo.tar.gz')
     promise = self.downloader.download(resource, self.tmp_dir)
     with self.assertRaises(downloader.urllib.error.URLError):
         promise.get()
コード例 #23
0
 def test_extract(self):
   """One file already extracted, one file with NO_EXTRACT, one to extract."""
   cached = resource_lib.Resource(path='/dl_dir/cached', extract_method=ZIP)
   new_ = resource_lib.Resource(path='/dl_dir/new', extract_method=TAR)
   no_extract = resource_lib.Resource(path='/dl_dir/noextract',
                                      extract_method=NO_EXTRACT)
   self.fs.add_file('/extract_dir/ZIP.cached')
   self.extract_results['/dl_dir/new'] = '/extract_dir/TAR.new'
   manager = self._get_manager()
   res = manager.extract({
       'cached': cached,
       'new': new_,
       'noextract': no_extract,
   })
   expected = _as_path({
       'cached': '/extract_dir/ZIP.cached',
       'new': '/extract_dir/TAR.new',
       'noextract': '/dl_dir/noextract',
   })
   self.assertEqual(res, expected)
   self.assertCountEqual(self.extracted_paths, [_as_path('/dl_dir/new')])
コード例 #24
0
 def test_download(self):
     """One file in cache, one not."""
     urls = {
         'cached': resource_lib.Resource(url='http://a.ch/a'),
         'new': resource_lib.Resource(url='https://a.ch/b'),
         # INFO file of c has been deleted:
         'info_deleted': resource_lib.Resource(url='https://a.ch/c'),
     }
     afname = resource_lib.Resource(url='http://a.ch/a').fname
     bfname = resource_lib.Resource(url='https://a.ch/b').fname
     cfname = resource_lib.Resource(url='https://a.ch/c').fname
     _ = [
         self._add_file(path) for path in [  # pylint: disable=g-complex-comprehension
             '/dl_dir/%s' % afname,
             '/dl_dir/%s.INFO' % afname,
             '/dl_dir/%s' % cfname,
         ]
     ]
     downloaded_b, self.dl_results[
         'https://a.ch/b'] = _get_promise_on_event(('sha_b', 10))
     downloaded_c, self.dl_results[
         'https://a.ch/c'] = _get_promise_on_event(('sha_c', 10))
     manager = self._get_manager()
     downloaded_b.set()
     downloaded_c.set()
     downloads = manager.download(urls)
     expected = {
         'cached': '/dl_dir/%s' % afname,
         'new': '/dl_dir/%s' % bfname,
         'info_deleted': '/dl_dir/%s' % cfname,
     }
     self.assertEqual(downloads, expected)
コード例 #25
0
ファイル: builder.py プロジェクト: jtpils/ige
def _get_base_resource(manual_dir):
    # dl_manager doesn't like dropbox apparently...
    path = os.path.join(manual_dir, "h36m.zip")
    if not tf.io.gfile.exists(path):
        if not tf.io.gfile.exists(manual_dir):
            tf.io.gfile.makedirs(manual_dir)
        url = "https://www.dropbox.com/s/e35qv3n6zlkouki/h36m.zip"
        ex = "wget -O %s %s" % (path, url)
        msg = ("Please manually download files from %s and place it at %s"
               "e.g.\n%s" % (url, path, ex))
        raise AssertionError(msg)
    return resource_lib.Resource(path=path,
                                 extract_method=resource_lib.ExtractMethod.ZIP)
コード例 #26
0
 def _extract(self, resource):
     """Extract a single archive, returns Promise->path to extraction result."""
     if isinstance(resource, six.string_types):
         resource = resource_lib.Resource(path=resource)
     if resource.extract_method == resource_lib.ExtractMethod.NO_EXTRACT:
         logging.info('Skipping extraction for %s (method=NO_EXTRACT).',
                      resource.path)
         return promise.Promise.resolve(resource.path)
     extract_path = os.path.join(self._extract_dir, resource.extract_fname)
     if not self._force_extraction and tf.io.gfile.exists(extract_path):
         logging.info('Reusing extraction of %s at %s.', resource.path,
                      extract_path)
         return promise.Promise.resolve(extract_path)
     return self._extractor.extract(resource, extract_path)
コード例 #27
0
 def test_download_and_extract_already_downloaded(self):
   url_a = 'http://a/a.zip'
   resource_a = resource_lib.Resource(url=url_a)
   self.file_names[resource_a.fname] = 'a.zip'
   # File was already downloaded:
   self._add_file('/dl_dir/%s' % resource_a.fname)
   self._write_info('/dl_dir/%s.INFO' % resource_a.fname,
                    {'original_fname': 'a.zip'})
   ext_a, self.extract_results['/dl_dir/%s' % resource_a.fname] = (
       _get_promise_on_event('/extract_dir/ZIP.%s' % resource_a.fname))
   ext_a.set()
   manager = self._get_manager()
   res = manager.download_and_extract(url_a)
   self.assertEqual(res, '/extract_dir/ZIP.%s' % resource_a.fname)
コード例 #28
0
 def test_download(self):
   """One file in cache, one not."""
   resource_a = resource_lib.Resource(url='http://a.ch/a')
   resource_a.sha256 = _sha256('some content')
   resource_b = resource_lib.Resource(url='http://a.ch/b')
   resource_b.sha256 = _sha256('content of b')
   resource_c = resource_lib.Resource(url='http://a.ch/c')
   resource_c.sha256 = _sha256('content of c')
   urls = {
       'cached': resource_a,
       'new': resource_lib.Resource(url='https://a.ch/b'),
       # INFO file of c has been deleted:
       'info_deleted': resource_lib.Resource(url='https://a.ch/c'),
   }
   _ = [self._add_file(path, content) for path, content in [  # pylint: disable=g-complex-comprehension
       ('/dl_dir/%s' % resource_a.fname, 'content of a'),
       ('/dl_dir/%s.INFO' % resource_a.fname, 'content of info file for a'),
       ('/dl_dir/%s' % resource_c.fname, 'content of c'),
   ]]
   downloaded_b, self.dl_results['https://a.ch/b'] = _get_promise_on_event(
       (_sha256('content of b'), 10))
   downloaded_c, self.dl_results['https://a.ch/c'] = _get_promise_on_event(
       (_sha256('content of c'), 10))
   manager = self._get_manager(checksums={
       resource_a.url: resource_a.sha256,
       'https://a.ch/b': resource_b.sha256,
       'https://a.ch/c': resource_c.sha256,
   })
   downloaded_b.set()
   downloaded_c.set()
   downloads = manager.download(urls)
   expected = {
       'cached': '/dl_dir/%s' % resource_a.fname,
       'new': '/dl_dir/%s' % resource_b.fname,
       'info_deleted': '/dl_dir/%s' % resource_c.fname,
   }
   self.assertEqual(downloads, expected)
コード例 #29
0
    def iter_archive(self, resource):
        """Returns iterator over files within archive.

    **Important Note**: caller should read files as they are yielded.
    Reading out of order is slow.

    Args:
      resource: path to archive or `tfds.download.Resource`.

    Returns:
      Generator yielding tuple (path_within_archive, file_obj).
    """
        if isinstance(resource, six.string_types):
            resource = resource_lib.Resource(path=resource)
        return extractor.iter_archive(resource.path, resource.extract_method)
コード例 #30
0
 def _extract(self, resource: ExtractPath) -> promise.Promise[ReadOnlyPath]:
   """Extract a single archive, returns Promise->path to extraction result."""
   if isinstance(resource, type_utils.PathLikeCls):
     resource = resource_lib.Resource(path=resource)
   path = resource.path
   extract_method = resource.extract_method
   if extract_method == resource_lib.ExtractMethod.NO_EXTRACT:
     logging.info('Skipping extraction for %s (method=NO_EXTRACT).', path)
     return promise.Promise.resolve(path)
   method_name = resource_lib.ExtractMethod(extract_method).name
   extract_path = self._extract_dir / f'{method_name}.{path.name}'
   if not self._force_extraction and extract_path.exists():
     logging.info('Reusing extraction of %s at %s.', path, extract_path)
     return promise.Promise.resolve(extract_path)
   return self._extractor.extract(path, extract_method, extract_path)