Esempio n. 1
0
 def setUp(self):
     super(DownloaderTest, self).setUp()
     self.addCleanup(mock.patch.stopall)
     self.downloader = downloader.get_downloader(10, hashlib.sha256)
     self.tmp_dir = tempfile.mkdtemp(dir=tf.compat.v1.test.get_temp_dir())
     self.url = 'http://example.com/foo.tar.gz'
     self.resource = resource_lib.Resource(url=self.url)
     self.path = os.path.join(self.tmp_dir, 'foo.tar.gz')
     self.incomplete_path = '%s.incomplete' % self.path
     self.response = b'This \nis an \nawesome\n response!'
     self.resp_checksum = hashlib.sha256(self.response).hexdigest()
     self.cookies = {}
     mock.patch.object(
         downloader.requests.Session,
         'get',
         lambda *a, **kw: _FakeResponse(self.url, self.response, self.
                                        cookies),
     ).start()
     self.downloader._pbar_url = mock.MagicMock()
     self.downloader._pbar_dl_size = mock.MagicMock()
     mock.patch.object(
         downloader.urllib.request,
         'urlopen',
         lambda *a, **kw: _FakeResponse(self.url, self.response, self.
                                        cookies),
     ).start()
Esempio n. 2
0
    def __init__(self,
                 dataset_name,
                 download_dir=None,
                 extract_dir=None,
                 manual_dir=None,
                 checksums=None,
                 force_download=False,
                 force_extraction=False):
        """Download manager constructor.

    Args:
      dataset_name: `str`, name of dataset this instance will be used for.
      download_dir: `str`, path to directory where downloads are stored.
      extract_dir: `str`, path to directory where artifacts are extracted.
      manual_dir: `str`, path to manually downloaded/extracted data directory.
      checksums: `dict<str url, str sha256>`, url to sha256 of resource.
        Only URLs present are checked.
      force_download: `bool`, default to False. If True, always [re]download.
      force_extraction: `bool`, default to False. If True, always [re]extract.
    """
        self._dataset_name = dataset_name
        self._checksums = checksums or {}
        self._recorded_download_checksums = {}
        self._download_sizes = {}
        self._download_dir = os.path.expanduser(download_dir)
        self._extract_dir = os.path.expanduser(extract_dir)
        self._manual_dir = os.path.expanduser(manual_dir)
        tf.gfile.MakeDirs(self._download_dir)
        tf.gfile.MakeDirs(self._extract_dir)
        self._force_download = force_download
        self._force_extraction = force_extraction
        self._extractor = extractor.get_extractor()
        self._downloader = downloader.get_downloader()
Esempio n. 3
0
  def setUp(self):
    self.addCleanup(absltest.mock.patch.stopall)
    self.downloader = downloader.get_downloader(10, hashlib.sha256)
    self.tmp_dir = tempfile.mkdtemp(dir=tf.compat.v1.test.get_temp_dir())
    self.url = 'http://example.com/foo.tar.gz'
    self.resource = resource_lib.Resource(url=self.url)
    self.path = os.path.join(self.tmp_dir, 'foo.tar.gz')
    self.incomplete_path = '%s.incomplete' % self.path
    self.response = b'This \nis an \nawesome\n response!'
    self.resp_checksum = hashlib.sha256(self.response).hexdigest()
    self.cookies = {}
    absltest.mock.patch.object(
        downloader.requests.Session,
        'get',
        lambda *a, **kw: _FakeResponse(self.url, self.response, self.cookies),
    ).start()
    self.downloader._pbar_url = absltest.mock.MagicMock()
    self.downloader._pbar_dl_size = absltest.mock.MagicMock()

    def write_fake_ftp_result(_, filename):
      with open(filename, 'wb') as result:
        result.write(self.response)

    absltest.mock.patch.object(
        downloader.urllib.request,
        'urlretrieve',
        write_fake_ftp_result,
    ).start()
Esempio n. 4
0
 def setUp(self):
     self.addCleanup(absltest.mock.patch.stopall)
     self.downloader = downloader.get_downloader(10, hashlib.sha256)
     self.tmp_dir = tempfile.mkdtemp(dir=tf.compat.v1.test.get_temp_dir())
     self.url = 'http://example.com/foo.tar.gz'
     self.resource = resource_lib.Resource(url=self.url)
     self.path = os.path.join(self.tmp_dir, 'foo.tar.gz')
     self.incomplete_path = '%s.incomplete' % self.path
     self.response = b'This \nis an \nawesome\n response!'
     self.resp_checksum = hashlib.sha256(self.response).hexdigest()
     self.cookies = {}
     absltest.mock.patch.object(
         downloader.requests.Session,
         'get',
         lambda *a, **kw: _FakeResponse(self.url, self.response, self.
                                        cookies),
     ).start()
     self.downloader._pbar_url = absltest.mock.MagicMock()
     self.downloader._pbar_dl_size = absltest.mock.MagicMock()
     absltest.mock.patch.object(
         downloader.urllib.request,
         'urlopen',
         lambda *a, **kw: _FakeResponse(self.url, self.response, self.
                                        cookies),
     ).start()
     if not hasattr(downloader.ssl, '_create_unverified_context'):
         # To not throw error for python<=2.7.8 while mocking SSLContext functions
         downloader.ssl.__dict__['_create_unverified_context'] = None
         downloader.ssl.__dict__['create_default_context'] = None
     # dummy ssl contexts returns for testing
     absltest.mock.patch.object(downloader.ssl,
                                '_create_unverified_context',
                                lambda *a, **kw: 'skip_ssl').start()
     absltest.mock.patch.object(downloader.ssl, 'create_default_context',
                                lambda *a, **kw: 'use_ssl').start()
Esempio n. 5
0
    def __init__(self,
                 download_dir,
                 extract_dir=None,
                 manual_dir=None,
                 manual_dir_instructions=None,
                 dataset_name=None,
                 force_download=False,
                 force_extraction=False,
                 register_checksums=False):
        """Download manager constructor.

    Args:
      download_dir: `str`, path to directory where downloads are stored.
      extract_dir: `str`, path to directory where artifacts are extracted.
      manual_dir: `str`, path to manually downloaded/extracted data directory.
      manual_dir_instructions: `str`, human readable instructions on how to
                         prepare contents of the manual_dir for this dataset.
      dataset_name: `str`, name of dataset this instance will be used for. If
        provided, downloads will contain which datasets they were used for.
      force_download: `bool`, default to False. If True, always [re]download.
      force_extraction: `bool`, default to False. If True, always [re]extract.
      register_checksums: `bool`, default to False. If True, dl checksums aren't
        checked, but stored into file.
    """
        self._dataset_name = dataset_name
        self._download_dir = os.path.expanduser(download_dir)
        self._extract_dir = os.path.expanduser(
            extract_dir or os.path.join(download_dir, 'extracted'))
        self._manual_dir = manual_dir and os.path.expanduser(manual_dir)
        self._manual_dir_instructions = manual_dir_instructions
        tf.io.gfile.makedirs(self._download_dir)
        tf.io.gfile.makedirs(self._extract_dir)
        self._force_download = force_download
        self._force_extraction = force_extraction
        self._extractor = extractor.get_extractor()
        self._downloader = downloader.get_downloader()
        self._register_checksums = register_checksums
        # All known URLs: {url: (size, checksum)}
        self._sizes_checksums = checksums.get_all_sizes_checksums()
        # To record what is being used: {url: (size, checksum)}
        self._recorded_sizes_checksums = {}
Esempio n. 6
0
    def __init__(self,
                 download_dir,
                 extract_dir=None,
                 manual_dir=None,
                 dataset_name=None,
                 checksums=None,
                 force_download=False,
                 force_extraction=False):
        """Download manager constructor.

    Args:
      download_dir: `str`, path to directory where downloads are stored.
      extract_dir: `str`, path to directory where artifacts are extracted.
      manual_dir: `str`, path to manually downloaded/extracted data directory.
      dataset_name: `str`, name of dataset this instance will be used for. If
        provided, downloads will contain which datasets they were used for.
      checksums: `dict<str url, str sha256>`, url to sha256 of resource.
        Only URLs present are checked.
        If empty, checksum of (already) downloaded files is computed and can
        then be retrieved using `recorded_download_checksums` property.
      force_download: `bool`, default to False. If True, always [re]download.
      force_extraction: `bool`, default to False. If True, always [re]extract.
    """
        self._dataset_name = dataset_name
        self._checksums = checksums or {}
        self._record_checksum_size = not checksums
        self._recorded_download_checksums = {}
        self._download_sizes = {}
        self._download_dir = os.path.expanduser(download_dir)
        self._extract_dir = os.path.expanduser(
            extract_dir or os.path.join(download_dir, 'extracted'))
        self._manual_dir = manual_dir and os.path.expanduser(manual_dir)
        tf.io.gfile.makedirs(self._download_dir)
        tf.io.gfile.makedirs(self._extract_dir)
        self._force_download = force_download
        self._force_extraction = force_extraction
        self._extractor = extractor.get_extractor()
        self._downloader = downloader.get_downloader()
Esempio n. 7
0
 def _downloader(self):
     if not self.__downloader:
         self.__downloader = downloader.get_downloader()
     return self.__downloader
Esempio n. 8
0
def get_downloader(*args: Any, **kwargs: Any):
    return downloader.get_downloader(*args, **kwargs)