def is_archive_file(name): """Return True if `name` is a considered as an archive file.""" archives = (".zip", ".tar.gz", ".tar.bz2", ".tgz", ".tar", ".whl") ext = splitext(name)[1].lower() if ext in archives: return True return False
def _download_http_url(link, session, temp_dir, hashes): """Download link url into temp_dir using provided session""" target_url = link.url.split('#', 1)[0] try: resp = session.get( target_url, # We use Accept-Encoding: identity here because requests # defaults to accepting compressed responses. This breaks in # a variety of ways depending on how the server is configured. # - Some servers will notice that the file isn't a compressible # file and will leave the file alone and with an empty # Content-Encoding # - Some servers will notice that the file is already # compressed and will leave the file alone and will add a # Content-Encoding: gzip header # - Some servers won't notice anything at all and will take # a file that's already been compressed and compress it again # and set the Content-Encoding: gzip header # By setting this to request only the identity encoding We're # hoping to eliminate the third case. Hopefully there does not # exist a server which when given a file will notice it is # already compressed and that you're not asking for a # compressed file and will then decompress it before sending # because if that's the case I don't think it'll ever be # possible to make this work. headers={"Accept-Encoding": "identity"}, stream=True, ) resp.raise_for_status() except requests.HTTPError as exc: logger.critical( "HTTP error %s while getting %s", exc.response.status_code, link, ) raise content_type = resp.headers.get('content-type', '') filename = link.filename # fallback # Have a look at the Content-Disposition header for a better guess content_disposition = resp.headers.get('content-disposition') if content_disposition: type, params = cgi.parse_header(content_disposition) # We use ``or`` here because we don't want to use an "empty" value # from the filename param. filename = params.get('filename') or filename ext = splitext(filename)[1] if not ext: ext = mimetypes.guess_extension(content_type) if ext: filename += ext if not ext and link.url != resp.url: ext = os.path.splitext(resp.url)[1] if ext: filename += ext file_path = os.path.join(temp_dir, filename) with open(file_path, 'wb') as content_file: _download_url(resp, link, content_file, hashes) return file_path, content_type
def is_archive_file(name): """Return True if `name` is a considered as an archive file.""" archives = ( '.zip', '.tar.gz', '.tar.bz2', '.tgz', '.tar', '.whl' ) ext = splitext(name)[1].lower() if ext in archives: return True return False
def is_archive_file(name): """Return True if `name` is a considered as an archive file.""" ext = splitext(name)[1].lower() if ext in ARCHIVE_EXTENSIONS: return True return False
def splitext(self): return splitext(posixpath.basename(self.path.rstrip('/')))
def path_to_url(path): """ Convert a path to a file: URL. The path will be made absolute and have quoted path parts. """ path = os.path.normpath(os.path.abspath(path)) url = urllib_parse.urljoin('file:', urllib_request.pathname2url(path)) return url def is_archive_file(name): """Return True if `name` is a considered as an archive file.""" archives = ( '.zip', '.tar.gz', '.tar.bz2', '.tgz', '.tar', '.whl' ) ext = splitext(name)[1].lower() if ext in archives: return True return False def unpack_vcs_link(link, location, only_download=False): vcs_backend = _get_used_vcs_backend(link) if only_download: vcs_backend.export(location) else: vcs_backend.unpack(location) def _get_used_vcs_backend(link): for backend in vcs.backends: