def metadata(distribution): """ Extracts the metadata from a distribution's PKG-INFO. """ name, ext = splitext(distribution) archive = { '.zip': ZipArchive, '.tar.gz': TarArchive, '.tar.bz2': TarArchive, '.tar.tgz': TarArchive, '.tar': TarArchive, '.tgz': TarArchive, }[ext](distribution) with closing(archive) as arc: try: pkg_info = arc.pkg_info() except AttributeError: # Do it the dumb way -- extract name and version from filename file_name, ext = splitext(arc.file_name.rsplit('/')[-1]) name, version = file_name.rsplit('-', 1) else: name = None version = None for line in pkg_info.split('\n'): if line.startswith('Name: '): name = line.split()[1] elif line.startswith('Version: '): version = line.split()[1] if name and version: break return name, version
def is_archive_file(name): """Return True if `name` is a considered as an archive file.""" archives = ('.zip', '.tar.gz', '.tar.bz2', '.tgz', '.tar', '.pybundle') ext = splitext(name)[1].lower() if ext in archives: return True return False
def is_archive_file(name): """Return True if `name` is a considered as an archive file.""" archives = (".zip", ".tar.gz", ".tar.bz2", ".tgz", ".tar", ".pybundle") ext = splitext(name)[1].lower() if ext in archives: return True return False
def get_requirement_from_url(url): """Get a requirement from the URL, if possible. This looks for #egg in the URL""" link = Link(url) egg_info = link.egg_fragment if not egg_info: egg_info = splitext(link.filename)[0] return package_to_requirement(egg_info)
def parse_package_and_version(path): """ Parse the package name and version number from a path """ filename = splitext(path)[0] if '-' not in filename: return None, None path_components = filename.split('-') for i, comp in enumerate(path_components): if comp[0].isdigit(): return ('_'.join(path_components[:i]).lower(), '-'.join(path_components[i:])) return None, None
def _find_cached_match(spec): #if spec.is_pinned: ## If this is a pinned spec, we can take a shortcut: if it is ## found in the dependency cache, we can safely assume it has ## been downloaded before, and thus must exist. We can know ## this without every reaching out to PyPI and avoid the ## network overhead. #name, version = spec.name, first(spec.preds)[1] #if (name, version) in self._dep_cache: #source = 'dependency cache' #return version, source version = None overrides = self.overrides.get(spec.name) ## Try the link cache, and otherwise, try PyPI if (spec.no_extra, overrides) in self._link_cache: link, version = self._link_cache[(spec.no_extra, overrides)] source = 'link cache' else: try: requirement = InstallRequirement.from_line(specline) link = self.finder.find_requirement(requirement, False) except DistributionNotFound: requirement = InstallRequirement.from_line( specline, prereleases=True) link = self.finder.find_requirement(requirement, False) link, version = self._link_hook(overrides, spec, link) # Hack to make pickle work link.comes_from = None source = 'PyPI' if link.egg_fragment: version = link.egg_fragment.rsplit('-', 1)[1] link = Link( link.url_without_fragment + "#%s=%s" % self.get_hash(link) ) elif not version: _, version = splitext(link.filename)[0].rsplit('-', 1) # It's more reliable to get version from pinned spec then filename if spec.is_pinned: version = spec.pinned assert version, "Version must be set!" self._link_cache[(spec.no_extra, overrides)] = (link, version) # Take this moment to smartly insert the pinned variant of this # spec into the link_cache, too pinned_spec = Spec.from_pinned(spec.name, version) self._link_cache[pinned_spec.fullname] = (link, version) return version, source
def _find_cached_match(spec): #if spec.is_pinned: ## If this is a pinned spec, we can take a shortcut: if it is ## found in the dependency cache, we can safely assume it has ## been downloaded before, and thus must exist. We can know ## this without every reaching out to PyPI and avoid the ## network overhead. #name, version = spec.name, first(spec.preds)[1] #if (name, version) in self._dep_cache: #source = 'dependency cache' #return version, source version = None overrides = self.overrides.get(spec.name) ## Try the link cache, and otherwise, try PyPI if (spec.no_extra, overrides) in self._link_cache: link, version = self._link_cache[(spec.no_extra, overrides)] source = 'link cache' else: try: requirement = InstallRequirement.from_line(specline) link = self.finder.find_requirement(requirement, False) except DistributionNotFound: requirement = InstallRequirement.from_line( specline, prereleases=True) link = self.finder.find_requirement(requirement, False) link, version = self._link_hook(overrides, spec, link) # Hack to make pickle work link.comes_from = None source = 'PyPI' if link.egg_fragment: version = link.egg_fragment.rsplit('-', 1)[1] link = Link(link.url_without_fragment + "#%s=%s" % self.get_hash(link)) elif not version: _, version = splitext(link.filename)[0].rsplit('-', 1) # It's more reliable to get version from pinned spec then filename if spec.is_pinned: version = spec.pinned assert version, "Version must be set!" self._link_cache[(spec.no_extra, overrides)] = (link, version) # Take this moment to smartly insert the pinned variant of this # spec into the link_cache, too pinned_spec = Spec.from_pinned(spec.name, version) self._link_cache[pinned_spec.fullname] = (link, version) return version, source
def unpack_http_url(link, location, download_cache, only_download): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') target_url = link.url.split('#', 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, '')) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) if (target_file and os.path.exists(target_file) and os.path.exists(target_file + '.content-type')): fp = open(target_file+'.content-type') content_type = fp.read().strip() fp.close() if link.md5_hash: download_hash = _get_md5_from_file(target_file, link) temp_location = target_file logger.notify('Using download cache from %s' % target_file) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()['content-type'] filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.info().get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.md5_hash: _check_md5(download_hash, link) if only_download: _copy_file(temp_location, location, content_type, link) else: unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None: os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, only_download): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') target_url = link.url.split('#', 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, '')) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) if (target_file and os.path.exists(target_file) and os.path.exists(target_file+'.content-type')): fp = open(target_file+'.content-type') content_type = fp.read().strip() fp.close() if link.md5_hash: download_hash = _get_md5_from_file(target_file, link) temp_location = target_file logger.notify('Using download cache from %s' % target_file) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()['content-type'] filename = link.filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.md5_hash: _check_md5(download_hash, link) if only_download: _copy_file(temp_location, location, content_type, link) else: unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None: os.unlink(temp_location) os.rmdir(temp_dir)
def _extract_dependencies(self, file_name): archive = None names = None dependencies = [] if file_name.find('.tar.gz') > 0 or file_name.find('.tar.bz2') > 0: archive = tarfile.TarFile.open( os.path.join(self.cache_directory, file_name)) elif file_name.find('.zip') > 0: archive = zipfile.ZipFile( os.path.join(self.cache_directory, file_name)) if type(archive) is zipfile.ZipFile: names = archive.namelist() elif type(archive) is tarfile.TarFile: names = archive.getnames() if names is not None: package_requires = None package_name = str(splitext(file_name)[0]).strip() if os.path.join(package_name, "requirements.txt") in names: archive.extract(os.path.join(package_name, "requirements.txt"), "tmp") package_requires = os.path.join("tmp", package_name, "requirements.txt") elif os.path.join(package_name, "tools/pip-requires") in names: archive.extract( os.path.join(package_name, "tools/pip-requires"), "tmp") package_requires = os.path.join("tmp", package_name, "tools/pip-requires") if package_requires is not None: for req in open(package_requires): if len(req.strip()) is 0: continue dependencies.append(req.strip()) if os.path.isdir(os.path.join("tmp")): shutil.rmtree(os.path.join("tmp")) if archive is not None: archive.close() return dependencies
def _link_hook(self, overrides, spec, link): overrides = overrides or {} if overrides.get("src"): logger.info( '===> Link override %s found for package %s', overrides, spec) _, version = splitext(link.filename)[0].rsplit('-', 1) spec = Spec.from_pinned(name=spec.name, version=version) src = env.from_string( overrides.get("src")).render({"spec": spec}) link = Link(src) # Hack to make pickle work link.comes_from = None return link, spec.pinned return link, None
def _extract_dependencies(self, file_name): archive = None names = None dependencies = [] if file_name.find('.tar.gz') > 0 or file_name.find('.tar.bz2') > 0: archive = tarfile.TarFile.open(os.path.join(self.cache_directory, file_name)) elif file_name.find('.zip') > 0: archive = zipfile.ZipFile(os.path.join(self.cache_directory, file_name)) if type(archive) is zipfile.ZipFile: names = archive.namelist() elif type(archive) is tarfile.TarFile: names = archive.getnames() if names is not None: package_requires = None package_name = str(splitext(file_name)[0]).strip() if os.path.join(package_name, "requirements.txt") in names: archive.extract(os.path.join(package_name, "requirements.txt"), "tmp") package_requires = os.path.join("tmp", package_name, "requirements.txt") elif os.path.join(package_name, "tools/pip-requires") in names: archive.extract(os.path.join(package_name, "tools/pip-requires"), "tmp") package_requires = os.path.join("tmp", package_name, "tools/pip-requires") if package_requires is not None: install_reqs = parse_requirements(package_requires) dependencies = [str(ir.req) for ir in install_reqs] if os.path.isdir(os.path.join("tmp")): shutil.rmtree(os.path.join("tmp")) if archive is not None: archive.close() return dependencies
def _extract_dependencies(self, file_name): archive = None names = None dependencies = [] if file_name.find(".tar.gz") > 0 or file_name.find(".tar.bz2") > 0: archive = tarfile.TarFile.open(os.path.join(self.cache_directory, file_name)) elif file_name.find(".zip") > 0: archive = zipfile.ZipFile(os.path.join(self.cache_directory, file_name)) if type(archive) is zipfile.ZipFile: names = archive.namelist() elif type(archive) is tarfile.TarFile: names = archive.getnames() if names is not None: package_requires = None package_name = str(splitext(file_name)[0]).strip() if os.path.join(package_name, "requirements.txt") in names: archive.extract(os.path.join(package_name, "requirements.txt"), "tmp") package_requires = os.path.join("tmp", package_name, "requirements.txt") elif os.path.join(package_name, "tools/pip-requires") in names: archive.extract(os.path.join(package_name, "tools/pip-requires"), "tmp") package_requires = os.path.join("tmp", package_name, "tools/pip-requires") if package_requires is not None: for req in open(package_requires): if len(req.strip()) is 0: continue dependencies.append(req.strip()) if os.path.isdir(os.path.join("tmp")): shutil.rmtree(os.path.join("tmp")) if archive is not None: archive.close() return dependencies
def splitext(self): return splitext(posixpath.basename(self.path.rstrip('/')))
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp("-unpack", "pip-") temp_location = None target_url = link.url.split("#", 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, "")) cache_content_type_file = cache_file + ".content-type" already_cached = os.path.exists(cache_file) and os.path.exists(cache_content_type_file) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does it's hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify("File was already downloaded %s" % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn("Previously-downloaded file %s has bad hash, " "re-downloading." % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify("Using download cache from %s" % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn("Cached file %s has bad hash, " "re-downloading." % temp_location) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy # let's download to a tmp dir if not temp_location: try: resp = session.get(target_url, stream=True) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get("content-type", "") filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get("content-disposition") if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get("filename") or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, '')) cache_content_type_file = cache_file + '.content-type' already_cached = (os.path.exists(cache_file) and os.path.exists(cache_content_type_file)) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # We have a cached file, and we haven't already found a good downloaded copy if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Cached file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy if not temp_location: resp = _get_response_from_url(target_url, link) content_type = resp.info().get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.info().get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) unpack_file(temp_location, location, content_type, link) if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join( download_cache, urllib.quote(target_url, '') ) cache_content_type_file = cache_file + '.content-type' already_cached = ( os.path.exists(cache_file) and os.path.exists(cache_content_type_file) ) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does its hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(already_downloaded) already_downloaded = None # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Cached file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy # let's download to a tmp dir if not temp_location: try: resp = session.get( target_url, # We use Accept-Encoding: identity here because requests # defaults to accepting compressed responses. This breaks in # a variety of ways depending on how the server is configured. # - Some servers will notice that the file isn't a compressible # file and will leave the file alone and with an empty # Content-Encoding # - Some servers will notice that the file is already # compressed and will leave the file alone and will add a # Content-Encoding: gzip header # - Some servers won't notice anything at all and will take # a file that's already been compressed and compress it again # and set the Content-Encoding: gzip header # By setting this to request only the identity encoding We're # hoping to eliminate the third case. Hopefully there does not # exist a server which when given a file will notice it is # already compressed and that you're not asking for a # compressed file and will then decompress it before sending # because if that's the case I don't think it'll ever be # possible to make this work. headers={"Accept-Encoding": "identity"}, stream=True, ) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None): temp_dir = tempfile.mkdtemp("-unpack", "pip-") target_url = link.url.split("#", 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, "")) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None if target_file and os.path.exists(target_file) and os.path.exists(target_file + ".content-type"): fp = open(target_file + ".content-type") content_type = fp.read().strip() fp.close() if link.hash and link.hash_name: download_hash = _get_hash_from_file(target_file, link) temp_location = target_file logger.notify("Using download cache from %s" % target_file) elif already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) logger.notify("File was already downloaded %s" % already_downloaded) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()["content-type"] filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.info().get("content-disposition") if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get("filename") or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None and not already_downloaded: os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_dir=None, session=None): if session is None: raise TypeError( "unpack_http_url() missing 1 required keyword argument: 'session'") temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] download_hash = None # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does its hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # let's download to a tmp dir if not temp_location: try: resp = session.get( target_url, # We use Accept-Encoding: identity here because requests # defaults to accepting compressed responses. This breaks in # a variety of ways depending on how the server is configured. # - Some servers will notice that the file isn't a compressible # file and will leave the file alone and with an empty # Content-Encoding # - Some servers will notice that the file is already # compressed and will leave the file alone and will add a # Content-Encoding: gzip header # - Some servers won't notice anything at all and will take # a file that's already been compressed and compress it again # and set the Content-Encoding: gzip header # By setting this to request only the identity encoding We're # hoping to eliminate the third case. Hopefully there does not # exist a server which when given a file will notice it is # already compressed and that you're not asking for a # compressed file and will then decompress it before sending # because if that's the case I don't think it'll ever be # possible to make this work. headers={"Accept-Encoding": "identity"}, stream=True, ) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) if not already_downloaded: os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, '')) cache_content_type_file = cache_file + '.content-type' already_cached = (os.path.exists(cache_file) and os.path.exists(cache_content_type_file)) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does it's hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Cached file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy # let's download to a tmp dir if not temp_location: try: resp = session.get(target_url, stream=True) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, '')) cache_content_type_file = cache_file + '.content-type' already_cached = ( os.path.exists(cache_file) and os.path.exists(cache_content_type_file) ) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(already_downloaded) already_downloaded = None # We have a cached file, and we haven't already found a good downloaded copy if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Cached file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy if not temp_location: try: resp = session.get(target_url, stream=True) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) unpack_file(temp_location, location, content_type, link) if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)