def unpack_file_url(link, location): source = url_to_path(link.url) content_type = mimetypes.guess_type(source)[0] if os.path.isdir(source): # delete the location since shutil will create it again :( if os.path.isdir(location): rmtree(location) copytree(source, location) else: unpack_file(source, location, content_type, link)
def test_wheel_version(tmpdir, data): future_wheel = "futurewheel-1.9-py2.py3-none-any.whl" broken_wheel = "brokenwheel-1.0-py2.py3-none-any.whl" future_version = (1, 9) unpack_file(data.packages.join(future_wheel), tmpdir + "future", None, None) unpack_file(data.packages.join(broken_wheel), tmpdir + "broken", None, None) assert wheel.wheel_version(tmpdir + "future") == future_version assert not wheel.wheel_version(tmpdir + "broken")
def test_wheel_version(tmpdir, data): future_wheel = 'futurewheel-1.9-py2.py3-none-any.whl' broken_wheel = 'brokenwheel-1.0-py2.py3-none-any.whl' future_version = (1, 9) unpack_file(data.packages.join(future_wheel), tmpdir + 'future', None, None) unpack_file(data.packages.join(broken_wheel), tmpdir + 'broken', None, None) assert wheel.wheel_version(tmpdir + 'future') == future_version assert not wheel.wheel_version(tmpdir + 'broken')
def unpack_http_url(link, location, download_cache, only_download): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') target_url = link.url.split('#', 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, '')) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) if (target_file and os.path.exists(target_file) and os.path.exists(target_file + '.content-type')): fp = open(target_file+'.content-type') content_type = fp.read().strip() fp.close() if link.md5_hash: download_hash = _get_md5_from_file(target_file, link) temp_location = target_file logger.notify('Using download cache from %s' % target_file) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()['content-type'] filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.info().get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.md5_hash: _check_md5(download_hash, link) if only_download: _copy_file(temp_location, location, content_type, link) else: unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None: os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_file_url(link, location, download_dir=None): link_path = url_to_path(link.url_without_fragment) already_downloaded = False # If it's a url to a local directory if os.path.isdir(link_path): if os.path.isdir(location): rmtree(location) shutil.copytree(link_path, location, symlinks=True) return # if link has a hash, let's confirm it matches if link.hash: link_path_hash = _get_hash_from_file(link_path, link) _check_hash(link_path_hash, link) # If a download dir is specified, is the file already there and valid? if download_dir: download_path = os.path.join(download_dir, link.filename) if os.path.exists(download_path): content_type = mimetypes.guess_type(download_path)[0] logger.notify('File was already downloaded %s' % download_path) if link.hash: download_hash = _get_hash_from_file(download_path, link) try: _check_hash(download_hash, link) already_downloaded = True except HashMismatch: logger.warn( 'Previously-downloaded file %s has bad hash, ' 're-downloading.' % link_path ) os.unlink(download_path) else: already_downloaded = True if already_downloaded: from_path = download_path else: from_path = link_path content_type = mimetypes.guess_type(from_path)[0] # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(from_path, location, content_type, link) # a download dir is specified and not already downloaded if download_dir and not already_downloaded: _copy_file(from_path, download_dir, content_type, link)
def prep(self, data, tmpdir): self.name = "sample" self.wheelpath = data.packages.join("sample-1.2.0-py2.py3-none-any.whl") self.req = pkg_resources.Requirement.parse("sample") self.src = os.path.join(tmpdir, "src") self.dest = os.path.join(tmpdir, "dest") unpack_file(self.wheelpath, self.src, None, None) self.scheme = { "scripts": os.path.join(self.dest, "bin"), "purelib": os.path.join(self.dest, "lib"), "data": os.path.join(self.dest, "data"), } self.src_dist_info = os.path.join(self.src, "sample-1.2.0.dist-info") self.dest_dist_info = os.path.join(self.scheme["purelib"], "sample-1.2.0.dist-info")
def test_unpack_wheel_no_flatten(self): from pip import util from tempfile import mkdtemp from shutil import rmtree filepath = '../data/packages/meta-1.0-py2.py3-none-any.whl' if not os.path.exists(filepath): pytest.skip("%s does not exist" % filepath) try: tmpdir = mkdtemp() util.unpack_file(filepath, tmpdir, 'application/zip', None) assert os.path.isdir(os.path.join(tmpdir, 'meta-1.0.dist-info')) finally: rmtree(tmpdir) pass
def prep(self, data, tmpdir): self.name = 'sample' self.wheelpath = data.packages.join( 'sample-1.2.0-py2.py3-none-any.whl') self.req = pkg_resources.Requirement.parse('sample') self.src = os.path.join(tmpdir, 'src') self.dest = os.path.join(tmpdir, 'dest') unpack_file(self.wheelpath, self.src, None, None) self.scheme = { 'scripts': os.path.join(self.dest, 'bin'), 'purelib': os.path.join(self.dest, 'lib'), 'data': os.path.join(self.dest, 'data'), } self.src_dist_info = os.path.join(self.src, 'sample-1.2.0.dist-info') self.dest_dist_info = os.path.join(self.scheme['purelib'], 'sample-1.2.0.dist-info')
def prep(self, data, tmpdir): self.name = 'sample' self.wheelpath = data.packages.join( 'sample-1.2.0-py2.py3-none-any.whl') self.req = pkg_resources.Requirement.parse('sample') self.src = os.path.join(tmpdir, 'src') self.dest = os.path.join(tmpdir, 'dest') unpack_file(self.wheelpath, self.src, None, None) self.scheme = { 'scripts': os.path.join(self.dest, 'bin'), 'purelib': os.path.join(self.dest, 'lib'), 'data': os.path.join(self.dest, 'data'), } self.src_dist_info = os.path.join( self.src, 'sample-1.2.0.dist-info') self.dest_dist_info = os.path.join( self.scheme['purelib'], 'sample-1.2.0.dist-info')
def unpack_http_url(link, location, download_cache, only_download): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') target_url = link.url.split('#', 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, '')) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) if (target_file and os.path.exists(target_file) and os.path.exists(target_file+'.content-type')): fp = open(target_file+'.content-type') content_type = fp.read().strip() fp.close() if link.md5_hash: download_hash = _get_md5_from_file(target_file, link) temp_location = target_file logger.notify('Using download cache from %s' % target_file) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()['content-type'] filename = link.filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.md5_hash: _check_md5(download_hash, link) if only_download: _copy_file(temp_location, location, content_type, link) else: unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None: os.unlink(temp_location) os.rmdir(temp_dir)
def get_archive_details(filename): if not sh.isfile(filename): raise IOError("Can not detail non-existent file %s" % (filename)) # Check if we already got the details of this file previously cache_key = "f:%s:%s" % (sh.basename(filename), sh.getsize(filename)) if cache_key in EGGS_DETAILED: return EGGS_DETAILED[cache_key] # Get pip to get us the egg-info. with utils.tempdir() as td: filename = sh.copy(filename, sh.joinpths(td, sh.basename(filename))) extract_to = sh.mkdir(sh.joinpths(td, 'build')) pip_util.unpack_file(filename, extract_to, content_type='', link='') details = get_directory_details(extract_to) EGGS_DETAILED[cache_key] = details return details
def unpack_http_url(link, location, download_cache, download_dir=None): temp_dir = tempfile.mkdtemp("-unpack", "pip-") target_url = link.url.split("#", 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, "")) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None if target_file and os.path.exists(target_file) and os.path.exists(target_file + ".content-type"): fp = open(target_file + ".content-type") content_type = fp.read().strip() fp.close() if link.hash and link.hash_name: download_hash = _get_hash_from_file(target_file, link) temp_location = target_file logger.notify("Using download cache from %s" % target_file) elif already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) logger.notify("File was already downloaded %s" % already_downloaded) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()["content-type"] filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.info().get("content-disposition") if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get("filename") or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None and not already_downloaded: os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, '')) cache_content_type_file = cache_file + '.content-type' already_cached = (os.path.exists(cache_file) and os.path.exists(cache_content_type_file)) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # We have a cached file, and we haven't already found a good downloaded copy if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Cached file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy if not temp_location: resp = _get_response_from_url(target_url, link) content_type = resp.info().get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.info().get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) unpack_file(temp_location, location, content_type, link) if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_dir=None, session=None): if session is None: raise TypeError( "unpack_http_url() missing 1 required keyword argument: 'session'") temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] download_hash = None # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does its hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # let's download to a tmp dir if not temp_location: try: resp = session.get( target_url, # We use Accept-Encoding: identity here because requests # defaults to accepting compressed responses. This breaks in # a variety of ways depending on how the server is configured. # - Some servers will notice that the file isn't a compressible # file and will leave the file alone and with an empty # Content-Encoding # - Some servers will notice that the file is already # compressed and will leave the file alone and will add a # Content-Encoding: gzip header # - Some servers won't notice anything at all and will take # a file that's already been compressed and compress it again # and set the Content-Encoding: gzip header # By setting this to request only the identity encoding We're # hoping to eliminate the third case. Hopefully there does not # exist a server which when given a file will notice it is # already compressed and that you're not asking for a # compressed file and will then decompress it before sending # because if that's the case I don't think it'll ever be # possible to make this work. headers={"Accept-Encoding": "identity"}, stream=True, ) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) if not already_downloaded: os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp("-unpack", "pip-") temp_location = None target_url = link.url.split("#", 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, "")) cache_content_type_file = cache_file + ".content-type" already_cached = os.path.exists(cache_file) and os.path.exists(cache_content_type_file) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does it's hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify("File was already downloaded %s" % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn("Previously-downloaded file %s has bad hash, " "re-downloading." % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify("Using download cache from %s" % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn("Cached file %s has bad hash, " "re-downloading." % temp_location) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy # let's download to a tmp dir if not temp_location: try: resp = session.get(target_url, stream=True) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get("content-type", "") filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get("content-disposition") if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get("filename") or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join( download_cache, urllib.quote(target_url, '') ) cache_content_type_file = cache_file + '.content-type' already_cached = ( os.path.exists(cache_file) and os.path.exists(cache_content_type_file) ) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does its hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(already_downloaded) already_downloaded = None # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Cached file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy # let's download to a tmp dir if not temp_location: try: resp = session.get( target_url, # We use Accept-Encoding: identity here because requests # defaults to accepting compressed responses. This breaks in # a variety of ways depending on how the server is configured. # - Some servers will notice that the file isn't a compressible # file and will leave the file alone and with an empty # Content-Encoding # - Some servers will notice that the file is already # compressed and will leave the file alone and will add a # Content-Encoding: gzip header # - Some servers won't notice anything at all and will take # a file that's already been compressed and compress it again # and set the Content-Encoding: gzip header # By setting this to request only the identity encoding We're # hoping to eliminate the third case. Hopefully there does not # exist a server which when given a file will notice it is # already compressed and that you're not asking for a # compressed file and will then decompress it before sending # because if that's the case I don't think it'll ever be # possible to make this work. headers={"Accept-Encoding": "identity"}, stream=True, ) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, '')) cache_content_type_file = cache_file + '.content-type' already_cached = ( os.path.exists(cache_file) and os.path.exists(cache_content_type_file) ) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(already_downloaded) already_downloaded = None # We have a cached file, and we haven't already found a good downloaded copy if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Cached file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy if not temp_location: try: resp = session.get(target_url, stream=True) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) unpack_file(temp_location, location, content_type, link) if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, '')) cache_content_type_file = cache_file + '.content-type' already_cached = (os.path.exists(cache_file) and os.path.exists(cache_content_type_file)) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does it's hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Cached file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy # let's download to a tmp dir if not temp_location: try: resp = session.get(target_url, stream=True) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)