def test_unpack_http_url_bad_downloaded_checksum(mock_get_response, mock_unpack_file): """ If already-downloaded file has bad checksum, re-download. """ base_url = 'http://www.example.com/somepackage.tgz' contents = b('downloaded') download_hash = hashlib.new('sha1', contents) link = Link(base_url + '#sha1=' + download_hash.hexdigest()) response = mock_get_response.return_value = MockResponse(contents) response.info = lambda: {'content-type': 'application/x-tar'} response.geturl = lambda: base_url download_dir = mkdtemp() try: downloaded_file = os.path.join(download_dir, 'somepackage.tgz') _write_file(downloaded_file, 'some contents') unpack_http_url(link, 'location', download_cache=None, download_dir=download_dir) # despite existence of downloaded file with bad hash, downloaded again mock_get_response.assert_called_once_with(base_url, link) # cached file is replaced with newly downloaded file with open(downloaded_file) as fh: assert fh.read() == 'downloaded' finally: rmtree(download_dir)
def test_unpack_http_url_with_urllib_response_without_content_type(data): """ It should download and unpack files even if no Content-Type header exists """ _real_session = PipSession() def _fake_session_get(*args, **kwargs): resp = _real_session.get(*args, **kwargs) del resp.headers["Content-Type"] return resp session = Mock() session.get = _fake_session_get uri = path_to_url(data.packages.join("simple-1.0.tar.gz")) link = Link(uri) temp_dir = mkdtemp() try: unpack_http_url( link, temp_dir, download_dir=None, session=session, ) assert set(os.listdir(temp_dir)) == set([ 'PKG-INFO', 'setup.cfg', 'setup.py', 'simple', 'simple.egg-info' ]) finally: rmtree(temp_dir)
def unpack_url(self, link, location, download_dir=None, only_download=False): if download_dir is None: download_dir = self.download_dir # non-editable vcs urls if is_vcs_url(link): if only_download: loc = download_dir else: loc = location unpack_vcs_link(link, loc, only_download) # file urls elif is_file_url(link): unpack_file_url(link, location, download_dir) if only_download: write_delete_marker_file(location) # http urls else: unpack_http_url( link, location, self.download_cache, download_dir, self.session, ) if only_download: write_delete_marker_file(location)
def test_unpack_http_url_with_urllib_response_without_content_type(): """ It should download and unpack files even if no Content-Type header exists """ def _get_response_from_url_mock(*args, **kw): resp = _get_response_from_url_original(*args, **kw) del resp.info()['content-type'] return resp with patch('pip.download._get_response_from_url', _get_response_from_url_mock) as mocked: uri = path_to_url2( os.path.join(tests_data, 'packages', 'simple-1.0.tar.gz')) link = Link(uri) temp_dir = mkdtemp() try: unpack_http_url(link, temp_dir, download_cache=None, download_dir=None) assert set(os.listdir(temp_dir)) == set([ 'PKG-INFO', 'setup.cfg', 'setup.py', 'simple', 'simple.egg-info' ]) finally: rmtree(temp_dir)
def test_unpack_http_url_bad_cache_checksum(mock_get_response, mock_unpack_file): """ If cached download has bad checksum, re-download. """ base_url = 'http://www.example.com/somepackage.tgz' contents = b('downloaded') download_hash = hashlib.new('sha1', contents) link = Link(base_url + '#sha1=' + download_hash.hexdigest()) response = mock_get_response.return_value = MockResponse(contents) response.info = lambda: {'content-type': 'application/x-tar'} response.geturl = lambda: base_url cache_dir = mkdtemp() try: cache_file = os.path.join(cache_dir, urllib.quote(base_url, '')) cache_ct_file = cache_file + '.content-type' _write_file(cache_file, 'some contents') _write_file(cache_ct_file, 'application/x-tar') unpack_http_url(link, 'location', download_cache=cache_dir) # despite existence of cached file with bad hash, downloaded again mock_get_response.assert_called_once_with(base_url, link) # cached file is replaced with newly downloaded file with open(cache_file) as fh: assert fh.read() == 'downloaded' finally: rmtree(cache_dir)
def fetch(self, requirement): """ Fetch a requirement and unpack it into a temporary directory. The responsibility of cleaning up the temporary directory returned is that of the caller. """ download_tmp = tempfile.mkdtemp() for _ in range(Fetcher.MAX_RETRIES): ir = InstallRequirement.from_line(str(requirement)) try: try: ir_link = self._finder.find_requirement(ir, upgrade=True) except DistributionNotFound: return None unpack_http_url(ir_link, download_tmp, self._cache) return download_tmp # This is distinctly below average because it means I need to know # about the underlying url fetching implementation. TODO: consider a # bespoke fetcher implementation, also consider exponential backoff here. except HTTPError as e: print('Got HTTPError: %s..retrying' % e) time.sleep(0.1) self._reinit() return None
def test_unpack_http_url_bad_downloaded_checksum(mock_unpack_file): """ If already-downloaded file has bad checksum, re-download. """ base_url = "http://www.example.com/somepackage.tgz" contents = b"downloaded" download_hash = hashlib.new("sha1", contents) link = Link(base_url + "#sha1=" + download_hash.hexdigest()) session = Mock() session.get = Mock() response = session.get.return_value = MockResponse(contents) response.headers = {"content-type": "application/x-tar"} response.url = base_url download_dir = mkdtemp() try: downloaded_file = os.path.join(download_dir, "somepackage.tgz") _write_file(downloaded_file, "some contents") unpack_http_url(link, "location", download_dir=download_dir, session=session) # despite existence of downloaded file with bad hash, downloaded again session.get.assert_called_once_with( "http://www.example.com/somepackage.tgz", headers={"Accept-Encoding": "identity"}, stream=True ) # cached file is replaced with newly downloaded file with open(downloaded_file) as fh: assert fh.read() == "downloaded" finally: rmtree(download_dir)
def test_unpack_http_url_with_urllib_response_without_content_type(): """ It should download and unpack files even if no Content-Type header exists """ def _get_response_from_url_mock(*args, **kw): resp = _get_response_from_url_original(*args, **kw) del resp.info()['content-type'] return resp with patch('pip.download._get_response_from_url', _get_response_from_url_mock) as mocked: uri = path_to_url2(os.path.join(tests_data, 'packages', 'simple-1.0.tar.gz')) link = Link(uri) temp_dir = mkdtemp() try: unpack_http_url(link, temp_dir, download_cache=None, download_dir=None) assert set(os.listdir(temp_dir)) == set(['PKG-INFO', 'setup.cfg', 'setup.py', 'simple', 'simple.egg-info']) finally: rmtree(temp_dir)
def download_url(self, link, location=None): cache = join(self.base_path, '.cache') self.mkdir(cache) location = location or self.build_path if is_vcs_url(link): return unpack_vcs_link(link, location, only_download=False) elif is_file_url(link): return unpack_file_url(link, location) else: return unpack_http_url(link, location, cache, False)
def test_unpack_http_url_bad_downloaded_checksum(mock_unpack_file): """ If already-downloaded file has bad checksum, re-download. """ base_url = 'http://www.example.com/somepackage.tgz' contents = b'downloaded' download_hash = hashlib.new('sha1', contents) link = Link(base_url + '#sha1=' + download_hash.hexdigest()) session = Mock() session.get = Mock() response = session.get.return_value = MockResponse(contents) response.headers = {'content-type': 'application/x-tar'} response.url = base_url download_dir = mkdtemp() try: downloaded_file = os.path.join(download_dir, 'somepackage.tgz') _write_file(downloaded_file, 'some contents') unpack_http_url( link, 'location', download_dir=download_dir, session=session, hashes=Hashes({'sha1': [download_hash.hexdigest()]}) ) # despite existence of downloaded file with bad hash, downloaded again session.get.assert_called_once_with( 'http://www.example.com/somepackage.tgz', headers={"Accept-Encoding": "identity"}, stream=True, ) # cached file is replaced with newly downloaded file with open(downloaded_file) as fh: assert fh.read() == 'downloaded' finally: rmtree(download_dir)
def test_unpack_http_url_bad_cache_checksum(mock_unpack_file): """ If cached download has bad checksum, re-download. """ base_url = 'http://www.example.com/somepackage.tgz' contents = b('downloaded') download_hash = hashlib.new('sha1', contents) link = Link(base_url + '#sha1=' + download_hash.hexdigest()) session = Mock() session.get = Mock() response = session.get.return_value = MockResponse(contents) response.headers = {'content-type': 'application/x-tar'} response.url = base_url cache_dir = mkdtemp() try: cache_file = os.path.join(cache_dir, urllib.quote(base_url, '')) cache_ct_file = cache_file + '.content-type' _write_file(cache_file, 'some contents') _write_file(cache_ct_file, 'application/x-tar') unpack_http_url( link, 'location', download_cache=cache_dir, session=session, ) # despite existence of cached file with bad hash, downloaded again session.get.assert_called_once_with( "http://www.example.com/somepackage.tgz", stream=True, ) # cached file is replaced with newly downloaded file with open(cache_file) as fh: assert fh.read() == 'downloaded' finally: rmtree(cache_dir)