def test_mode_reuse_dataset(self):
        """Check that cache is reused in REUSE_DATASET_IF_EXISTS mode."""
        # REUSE_CACHE_IF_EXISTS and REUSE_DATASET_IF_EXISTS should act the same
        # way for the download

        dl_manager_1 = download_manager.DownloadManager(
            cache_dir=tf.test.get_temp_dir(),
            mode=util.GenerateMode.REUSE_DATASET_IF_EXISTS,
        )
        dl_manager_2 = download_manager.DownloadManager(
            cache_dir=tf.test.get_temp_dir(),
            mode=util.GenerateMode.REUSE_DATASET_IF_EXISTS,
        )

        process_mock = tf.test.mock.Mock()
        key = '/unittest/test_mode_reuse_dataset'

        # Execute the same processing twice
        output_dir_1 = dl_manager_1.execute_and_cache(process_mock,
                                                      cache_key=key)
        output_dir_2 = dl_manager_2.execute_and_cache(process_mock,
                                                      cache_key=key)

        # Results should be the same
        self.assertEqual(output_dir_1, output_dir_2)
        # The process function should have been called only once
        self.assertEqual(process_mock.call_count, 1)
Esempio n. 2
0
 def _get_manager(self,
                  force_download=False,
                  force_extraction=False,
                  checksums=None,
                  dl_dir='/dl_dir',
                  extract_dir='/extract_dir'):
     manager = dm.DownloadManager(
         dataset_name='mnist',
         download_dir=dl_dir,
         extract_dir=extract_dir,
         manual_dir='/manual_dir',
         force_download=force_download,
         force_extraction=force_extraction,
     )
     if checksums:
         manager._sizes_checksums = checksums
     download = absltest.mock.patch.object(
         manager._downloader,
         'download',
         side_effect=lambda url, tmpdir_path: self.dl_results[url])
     self.downloader_download = download.start()
     extract = absltest.mock.patch.object(
         manager._extractor,
         'extract',
         side_effect=lambda path, method, dest: self.extract_results[path])
     self.extractor_extract = extract.start()
     return manager
    def setUpClass(cls):
        cache_dir = tf.test.get_temp_dir()

        # Create a dummy file
        dummy_dir = os.path.join(cache_dir, 'dummy')
        dummy_filepath = os.path.join(dummy_dir, 'dummy.txt')

        gfile.MakeDirs(dummy_dir)
        dummy_file_contents = 'hello world'
        with gfile.Open(dummy_filepath, 'w') as f:
            f.write(dummy_file_contents)

        # File containing compressed archives
        input_dir = os.path.join(cache_dir, 'to_extract')
        gfile.MakeDirs(input_dir)

        dl_manager = download_manager.DownloadManager(
            cache_dir=cache_dir,
            mode=util.GenerateMode.REUSE_CACHE_IF_EXISTS,
        )

        cls.dummy_dir = dummy_dir
        cls.dummy_filepath = dummy_filepath
        cls.dummy_file_contents = dummy_file_contents
        cls.input_dir = input_dir
        cls.dl_manager = dl_manager
 def _get_manager(self,
                  force_download=False,
                  force_extraction=False,
                  checksums=None,
                  dl_dir='/dl_dir',
                  extract_dir='/extract_dir'):
     manager = dm.DownloadManager(dataset_name='my_dataset',
                                  download_dir=dl_dir,
                                  extract_dir=extract_dir,
                                  manual_dir='/manual_dir',
                                  force_download=force_download,
                                  force_extraction=force_extraction,
                                  checksums=checksums)
     download = tf.compat.v1.test.mock.patch.object(
         manager._downloader,
         'download',
         side_effect=lambda resource, tmpdir_path: self.dl_results[resource.
                                                                   url])
     self.downloader_download = download.start()
     extract = tf.compat.v1.test.mock.patch.object(
         manager._extractor,
         'extract',
         side_effect=lambda resource, dest: self.extract_results[resource.
                                                                 path])
     self.extractor_extract = extract.start()
     return manager
    def test_mode_redownload(self):
        """Check that cache is NOT reused in FORCE_REDOWNLOAD mode."""

        dl_manager_1 = download_manager.DownloadManager(
            cache_dir=tf.test.get_temp_dir(),
            mode=util.GenerateMode.FORCE_REDOWNLOAD,
        )
        dl_manager_2 = download_manager.DownloadManager(
            cache_dir=tf.test.get_temp_dir(),
            mode=util.GenerateMode.FORCE_REDOWNLOAD,
        )

        process_mock = tf.test.mock.Mock()
        key = '/unittest/test_mode_redownload'

        # Execute the same processing twice
        dl_manager_1.execute_and_cache(process_mock, cache_key=key)
        dl_manager_2.execute_and_cache(process_mock, cache_key=key)

        # The process function should have been called twice
        self.assertEqual(process_mock.call_count, 2)
Esempio n. 6
0
 def _get_manager(self,
                  register_checksums=True,
                  url_infos=None,
                  dl_dir='/dl_dir',
                  extract_dir='/extract_dir',
                  **kwargs):
     manager = dm.DownloadManager(dataset_name='mnist',
                                  download_dir=dl_dir,
                                  extract_dir=extract_dir,
                                  manual_dir='/manual_dir',
                                  register_checksums=register_checksums,
                                  **kwargs)
     if url_infos:
         manager._url_infos = url_infos
     return manager
Esempio n. 7
0
 def _get_manager(self,
                  register_checksums=True,
                  url_infos=None,
                  dl_dir='/dl_dir',
                  extract_dir='/extract_dir',
                  manual_dir='/manual_dir',
                  **kwargs):
     tf.io.gfile.mkdir('/checksums')
     manager = dm.DownloadManager(
         dataset_name='mnist',
         download_dir=dl_dir,
         extract_dir=extract_dir,
         manual_dir=manual_dir,
         register_checksums=register_checksums,
         register_checksums_path='/checksums/checksums.tsv',
         **kwargs)
     if url_infos:
         manager._url_infos = url_infos
     return manager