def test_mode_reuse_dataset(self): """Check that cache is reused in REUSE_DATASET_IF_EXISTS mode.""" # REUSE_CACHE_IF_EXISTS and REUSE_DATASET_IF_EXISTS should act the same # way for the download dl_manager_1 = download_manager.DownloadManager( cache_dir=tf.test.get_temp_dir(), mode=util.GenerateMode.REUSE_DATASET_IF_EXISTS, ) dl_manager_2 = download_manager.DownloadManager( cache_dir=tf.test.get_temp_dir(), mode=util.GenerateMode.REUSE_DATASET_IF_EXISTS, ) process_mock = tf.test.mock.Mock() key = '/unittest/test_mode_reuse_dataset' # Execute the same processing twice output_dir_1 = dl_manager_1.execute_and_cache(process_mock, cache_key=key) output_dir_2 = dl_manager_2.execute_and_cache(process_mock, cache_key=key) # Results should be the same self.assertEqual(output_dir_1, output_dir_2) # The process function should have been called only once self.assertEqual(process_mock.call_count, 1)
def _get_manager(self, force_download=False, force_extraction=False, checksums=None, dl_dir='/dl_dir', extract_dir='/extract_dir'): manager = dm.DownloadManager( dataset_name='mnist', download_dir=dl_dir, extract_dir=extract_dir, manual_dir='/manual_dir', force_download=force_download, force_extraction=force_extraction, ) if checksums: manager._sizes_checksums = checksums download = absltest.mock.patch.object( manager._downloader, 'download', side_effect=lambda url, tmpdir_path: self.dl_results[url]) self.downloader_download = download.start() extract = absltest.mock.patch.object( manager._extractor, 'extract', side_effect=lambda path, method, dest: self.extract_results[path]) self.extractor_extract = extract.start() return manager
def setUpClass(cls): cache_dir = tf.test.get_temp_dir() # Create a dummy file dummy_dir = os.path.join(cache_dir, 'dummy') dummy_filepath = os.path.join(dummy_dir, 'dummy.txt') gfile.MakeDirs(dummy_dir) dummy_file_contents = 'hello world' with gfile.Open(dummy_filepath, 'w') as f: f.write(dummy_file_contents) # File containing compressed archives input_dir = os.path.join(cache_dir, 'to_extract') gfile.MakeDirs(input_dir) dl_manager = download_manager.DownloadManager( cache_dir=cache_dir, mode=util.GenerateMode.REUSE_CACHE_IF_EXISTS, ) cls.dummy_dir = dummy_dir cls.dummy_filepath = dummy_filepath cls.dummy_file_contents = dummy_file_contents cls.input_dir = input_dir cls.dl_manager = dl_manager
def _get_manager(self, force_download=False, force_extraction=False, checksums=None, dl_dir='/dl_dir', extract_dir='/extract_dir'): manager = dm.DownloadManager(dataset_name='my_dataset', download_dir=dl_dir, extract_dir=extract_dir, manual_dir='/manual_dir', force_download=force_download, force_extraction=force_extraction, checksums=checksums) download = tf.compat.v1.test.mock.patch.object( manager._downloader, 'download', side_effect=lambda resource, tmpdir_path: self.dl_results[resource. url]) self.downloader_download = download.start() extract = tf.compat.v1.test.mock.patch.object( manager._extractor, 'extract', side_effect=lambda resource, dest: self.extract_results[resource. path]) self.extractor_extract = extract.start() return manager
def test_mode_redownload(self): """Check that cache is NOT reused in FORCE_REDOWNLOAD mode.""" dl_manager_1 = download_manager.DownloadManager( cache_dir=tf.test.get_temp_dir(), mode=util.GenerateMode.FORCE_REDOWNLOAD, ) dl_manager_2 = download_manager.DownloadManager( cache_dir=tf.test.get_temp_dir(), mode=util.GenerateMode.FORCE_REDOWNLOAD, ) process_mock = tf.test.mock.Mock() key = '/unittest/test_mode_redownload' # Execute the same processing twice dl_manager_1.execute_and_cache(process_mock, cache_key=key) dl_manager_2.execute_and_cache(process_mock, cache_key=key) # The process function should have been called twice self.assertEqual(process_mock.call_count, 2)
def _get_manager(self, register_checksums=True, url_infos=None, dl_dir='/dl_dir', extract_dir='/extract_dir', **kwargs): manager = dm.DownloadManager(dataset_name='mnist', download_dir=dl_dir, extract_dir=extract_dir, manual_dir='/manual_dir', register_checksums=register_checksums, **kwargs) if url_infos: manager._url_infos = url_infos return manager
def _get_manager(self, register_checksums=True, url_infos=None, dl_dir='/dl_dir', extract_dir='/extract_dir', manual_dir='/manual_dir', **kwargs): tf.io.gfile.mkdir('/checksums') manager = dm.DownloadManager( dataset_name='mnist', download_dir=dl_dir, extract_dir=extract_dir, manual_dir=manual_dir, register_checksums=register_checksums, register_checksums_path='/checksums/checksums.tsv', **kwargs) if url_infos: manager._url_infos = url_infos return manager