def setUp(self): super(DownloaderTest, self).setUp() self.addCleanup(mock.patch.stopall) self.downloader = downloader.get_downloader(10, hashlib.sha256) self.tmp_dir = tempfile.mkdtemp(dir=tf.compat.v1.test.get_temp_dir()) self.url = 'http://example.com/foo.tar.gz' self.resource = resource_lib.Resource(url=self.url) self.path = os.path.join(self.tmp_dir, 'foo.tar.gz') self.incomplete_path = '%s.incomplete' % self.path self.response = b'This \nis an \nawesome\n response!' self.resp_checksum = hashlib.sha256(self.response).hexdigest() self.cookies = {} mock.patch.object( downloader.requests.Session, 'get', lambda *a, **kw: _FakeResponse(self.url, self.response, self. cookies), ).start() self.downloader._pbar_url = mock.MagicMock() self.downloader._pbar_dl_size = mock.MagicMock() mock.patch.object( downloader.urllib.request, 'urlopen', lambda *a, **kw: _FakeResponse(self.url, self.response, self. cookies), ).start()
def __init__(self, dataset_name, download_dir=None, extract_dir=None, manual_dir=None, checksums=None, force_download=False, force_extraction=False): """Download manager constructor. Args: dataset_name: `str`, name of dataset this instance will be used for. download_dir: `str`, path to directory where downloads are stored. extract_dir: `str`, path to directory where artifacts are extracted. manual_dir: `str`, path to manually downloaded/extracted data directory. checksums: `dict<str url, str sha256>`, url to sha256 of resource. Only URLs present are checked. force_download: `bool`, default to False. If True, always [re]download. force_extraction: `bool`, default to False. If True, always [re]extract. """ self._dataset_name = dataset_name self._checksums = checksums or {} self._recorded_download_checksums = {} self._download_sizes = {} self._download_dir = os.path.expanduser(download_dir) self._extract_dir = os.path.expanduser(extract_dir) self._manual_dir = os.path.expanduser(manual_dir) tf.gfile.MakeDirs(self._download_dir) tf.gfile.MakeDirs(self._extract_dir) self._force_download = force_download self._force_extraction = force_extraction self._extractor = extractor.get_extractor() self._downloader = downloader.get_downloader()
def setUp(self): self.addCleanup(absltest.mock.patch.stopall) self.downloader = downloader.get_downloader(10, hashlib.sha256) self.tmp_dir = tempfile.mkdtemp(dir=tf.compat.v1.test.get_temp_dir()) self.url = 'http://example.com/foo.tar.gz' self.resource = resource_lib.Resource(url=self.url) self.path = os.path.join(self.tmp_dir, 'foo.tar.gz') self.incomplete_path = '%s.incomplete' % self.path self.response = b'This \nis an \nawesome\n response!' self.resp_checksum = hashlib.sha256(self.response).hexdigest() self.cookies = {} absltest.mock.patch.object( downloader.requests.Session, 'get', lambda *a, **kw: _FakeResponse(self.url, self.response, self.cookies), ).start() self.downloader._pbar_url = absltest.mock.MagicMock() self.downloader._pbar_dl_size = absltest.mock.MagicMock() def write_fake_ftp_result(_, filename): with open(filename, 'wb') as result: result.write(self.response) absltest.mock.patch.object( downloader.urllib.request, 'urlretrieve', write_fake_ftp_result, ).start()
def setUp(self): self.addCleanup(absltest.mock.patch.stopall) self.downloader = downloader.get_downloader(10, hashlib.sha256) self.tmp_dir = tempfile.mkdtemp(dir=tf.compat.v1.test.get_temp_dir()) self.url = 'http://example.com/foo.tar.gz' self.resource = resource_lib.Resource(url=self.url) self.path = os.path.join(self.tmp_dir, 'foo.tar.gz') self.incomplete_path = '%s.incomplete' % self.path self.response = b'This \nis an \nawesome\n response!' self.resp_checksum = hashlib.sha256(self.response).hexdigest() self.cookies = {} absltest.mock.patch.object( downloader.requests.Session, 'get', lambda *a, **kw: _FakeResponse(self.url, self.response, self. cookies), ).start() self.downloader._pbar_url = absltest.mock.MagicMock() self.downloader._pbar_dl_size = absltest.mock.MagicMock() absltest.mock.patch.object( downloader.urllib.request, 'urlopen', lambda *a, **kw: _FakeResponse(self.url, self.response, self. cookies), ).start() if not hasattr(downloader.ssl, '_create_unverified_context'): # To not throw error for python<=2.7.8 while mocking SSLContext functions downloader.ssl.__dict__['_create_unverified_context'] = None downloader.ssl.__dict__['create_default_context'] = None # dummy ssl contexts returns for testing absltest.mock.patch.object(downloader.ssl, '_create_unverified_context', lambda *a, **kw: 'skip_ssl').start() absltest.mock.patch.object(downloader.ssl, 'create_default_context', lambda *a, **kw: 'use_ssl').start()
def __init__(self, download_dir, extract_dir=None, manual_dir=None, manual_dir_instructions=None, dataset_name=None, force_download=False, force_extraction=False, register_checksums=False): """Download manager constructor. Args: download_dir: `str`, path to directory where downloads are stored. extract_dir: `str`, path to directory where artifacts are extracted. manual_dir: `str`, path to manually downloaded/extracted data directory. manual_dir_instructions: `str`, human readable instructions on how to prepare contents of the manual_dir for this dataset. dataset_name: `str`, name of dataset this instance will be used for. If provided, downloads will contain which datasets they were used for. force_download: `bool`, default to False. If True, always [re]download. force_extraction: `bool`, default to False. If True, always [re]extract. register_checksums: `bool`, default to False. If True, dl checksums aren't checked, but stored into file. """ self._dataset_name = dataset_name self._download_dir = os.path.expanduser(download_dir) self._extract_dir = os.path.expanduser( extract_dir or os.path.join(download_dir, 'extracted')) self._manual_dir = manual_dir and os.path.expanduser(manual_dir) self._manual_dir_instructions = manual_dir_instructions tf.io.gfile.makedirs(self._download_dir) tf.io.gfile.makedirs(self._extract_dir) self._force_download = force_download self._force_extraction = force_extraction self._extractor = extractor.get_extractor() self._downloader = downloader.get_downloader() self._register_checksums = register_checksums # All known URLs: {url: (size, checksum)} self._sizes_checksums = checksums.get_all_sizes_checksums() # To record what is being used: {url: (size, checksum)} self._recorded_sizes_checksums = {}
def __init__(self, download_dir, extract_dir=None, manual_dir=None, dataset_name=None, checksums=None, force_download=False, force_extraction=False): """Download manager constructor. Args: download_dir: `str`, path to directory where downloads are stored. extract_dir: `str`, path to directory where artifacts are extracted. manual_dir: `str`, path to manually downloaded/extracted data directory. dataset_name: `str`, name of dataset this instance will be used for. If provided, downloads will contain which datasets they were used for. checksums: `dict<str url, str sha256>`, url to sha256 of resource. Only URLs present are checked. If empty, checksum of (already) downloaded files is computed and can then be retrieved using `recorded_download_checksums` property. force_download: `bool`, default to False. If True, always [re]download. force_extraction: `bool`, default to False. If True, always [re]extract. """ self._dataset_name = dataset_name self._checksums = checksums or {} self._record_checksum_size = not checksums self._recorded_download_checksums = {} self._download_sizes = {} self._download_dir = os.path.expanduser(download_dir) self._extract_dir = os.path.expanduser( extract_dir or os.path.join(download_dir, 'extracted')) self._manual_dir = manual_dir and os.path.expanduser(manual_dir) tf.io.gfile.makedirs(self._download_dir) tf.io.gfile.makedirs(self._extract_dir) self._force_download = force_download self._force_extraction = force_extraction self._extractor = extractor.get_extractor() self._downloader = downloader.get_downloader()
def _downloader(self): if not self.__downloader: self.__downloader = downloader.get_downloader() return self.__downloader
def get_downloader(*args: Any, **kwargs: Any): return downloader.get_downloader(*args, **kwargs)