def unpack_http_url(link, location, download_cache, only_download): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') target_url = link.url.split('#', 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, '')) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) if (target_file and os.path.exists(target_file) and os.path.exists(target_file + '.content-type')): fp = open(target_file+'.content-type') content_type = fp.read().strip() fp.close() if link.md5_hash: download_hash = _get_md5_from_file(target_file, link) temp_location = target_file logger.notify('Using download cache from %s' % target_file) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()['content-type'] filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.info().get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.md5_hash: _check_md5(download_hash, link) if only_download: _copy_file(temp_location, location, content_type, link) else: unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None: os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, only_download): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') target_url = link.url.split('#', 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, '')) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) if (target_file and os.path.exists(target_file) and os.path.exists(target_file+'.content-type')): fp = open(target_file+'.content-type') content_type = fp.read().strip() fp.close() if link.md5_hash: download_hash = _get_md5_from_file(target_file, link) temp_location = target_file logger.notify('Using download cache from %s' % target_file) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()['content-type'] filename = link.filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.md5_hash: _check_md5(download_hash, link) if only_download: _copy_file(temp_location, location, content_type, link) else: unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None: os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, '')) cache_content_type_file = cache_file + '.content-type' already_cached = (os.path.exists(cache_file) and os.path.exists(cache_content_type_file)) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does it's hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Cached file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy # let's download to a tmp dir if not temp_location: try: resp = session.get(target_url, stream=True) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, '')) cache_content_type_file = cache_file + '.content-type' already_cached = ( os.path.exists(cache_file) and os.path.exists(cache_content_type_file) ) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(already_downloaded) already_downloaded = None # We have a cached file, and we haven't already found a good downloaded copy if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Cached file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy if not temp_location: try: resp = session.get(target_url, stream=True) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) unpack_file(temp_location, location, content_type, link) if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp("-unpack", "pip-") temp_location = None target_url = link.url.split("#", 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, "")) cache_content_type_file = cache_file + ".content-type" already_cached = os.path.exists(cache_file) and os.path.exists(cache_content_type_file) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does it's hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify("File was already downloaded %s" % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn("Previously-downloaded file %s has bad hash, " "re-downloading." % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify("Using download cache from %s" % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn("Cached file %s has bad hash, " "re-downloading." % temp_location) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy # let's download to a tmp dir if not temp_location: try: resp = session.get(target_url, stream=True) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get("content-type", "") filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get("content-disposition") if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get("filename") or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None): temp_dir = tempfile.mkdtemp("-unpack", "pip-") target_url = link.url.split("#", 1)[0] target_file = None download_hash = None if download_cache: target_file = os.path.join(download_cache, urllib.quote(target_url, "")) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None if target_file and os.path.exists(target_file) and os.path.exists(target_file + ".content-type"): fp = open(target_file + ".content-type") content_type = fp.read().strip() fp.close() if link.hash and link.hash_name: download_hash = _get_hash_from_file(target_file, link) temp_location = target_file logger.notify("Using download cache from %s" % target_file) elif already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) logger.notify("File was already downloaded %s" % already_downloaded) else: resp = _get_response_from_url(target_url, link) content_type = resp.info()["content-type"] filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.info().get("content-disposition") if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get("filename") or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) unpack_file(temp_location, location, content_type, link) if target_file and target_file != temp_location: cache_download(target_file, temp_location, content_type) if target_file is None and not already_downloaded: os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None, session=None): if session is None: session = PipSession() temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join( download_cache, urllib.quote(target_url, '') ) cache_content_type_file = cache_file + '.content-type' already_cached = ( os.path.exists(cache_file) and os.path.exists(cache_content_type_file) ) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None # If already downloaded, does its hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(already_downloaded) already_downloaded = None # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn( 'Cached file %s has bad hash, ' 're-downloading.' % temp_location ) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy # let's download to a tmp dir if not temp_location: try: resp = session.get( target_url, # We use Accept-Encoding: identity here because requests # defaults to accepting compressed responses. This breaks in # a variety of ways depending on how the server is configured. # - Some servers will notice that the file isn't a compressible # file and will leave the file alone and with an empty # Content-Encoding # - Some servers will notice that the file is already # compressed and will leave the file alone and will add a # Content-Encoding: gzip header # - Some servers won't notice anything at all and will take # a file that's already been compressed and compress it again # and set the Content-Encoding: gzip header # By setting this to request only the identity encoding We're # hoping to eliminate the third case. Hopefully there does not # exist a server which when given a file will notice it is # already compressed and that you're not asking for a # compressed file and will then decompress it before sending # because if that's the case I don't think it'll ever be # possible to make this work. headers={"Accept-Encoding": "identity"}, stream=True, ) resp.raise_for_status() except requests.HTTPError as exc: logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link)) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None): temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] already_cached = False cache_file = None cache_content_type_file = None download_hash = None if download_cache: cache_file = os.path.join(download_cache, urllib.quote(target_url, '')) cache_content_type_file = cache_file + '.content-type' already_cached = (os.path.exists(cache_file) and os.path.exists(cache_content_type_file)) if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] logger.notify('File was already downloaded %s' % already_downloaded) if link.hash: download_hash = _get_hash_from_file(temp_location, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Previously-downloaded file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(already_downloaded) already_downloaded = None # We have a cached file, and we haven't already found a good downloaded copy if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() temp_location = cache_file logger.notify('Using download cache from %s' % cache_file) if link.hash and link.hash_name: download_hash = _get_hash_from_file(cache_file, link) try: _check_hash(download_hash, link) except HashMismatch: logger.warn('Cached file %s has bad hash, ' 're-downloading.' % temp_location) temp_location = None os.unlink(cache_file) os.unlink(cache_content_type_file) already_cached = False # We don't have either a cached or a downloaded copy if not temp_location: resp = _get_response_from_url(target_url, link) content_type = resp.info().get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.info().get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != geturl(resp): ext = os.path.splitext(geturl(resp))[1] if ext: filename += ext temp_location = os.path.join(temp_dir, filename) download_hash = _download_url(resp, link, temp_location) if link.hash and link.hash_name: _check_hash(download_hash, link) if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) unpack_file(temp_location, location, content_type, link) if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) if not (already_cached or already_downloaded): os.unlink(temp_location) os.rmdir(temp_dir)