Exemple #1
0
def unpack_file_url(link, location):
    source = url_to_path(link.url)
    content_type = mimetypes.guess_type(source)[0]
    if os.path.isdir(source):
        # delete the location since shutil will create it again :(
        if os.path.isdir(location):
            rmtree(location)
        copytree(source, location)
    else:
        unpack_file(source, location, content_type, link)
Exemple #2
0
def test_wheel_version(tmpdir, data):
    future_wheel = "futurewheel-1.9-py2.py3-none-any.whl"
    broken_wheel = "brokenwheel-1.0-py2.py3-none-any.whl"
    future_version = (1, 9)

    unpack_file(data.packages.join(future_wheel), tmpdir + "future", None, None)
    unpack_file(data.packages.join(broken_wheel), tmpdir + "broken", None, None)

    assert wheel.wheel_version(tmpdir + "future") == future_version
    assert not wheel.wheel_version(tmpdir + "broken")
Exemple #3
0
def test_wheel_version(tmpdir, data):
    future_wheel = 'futurewheel-1.9-py2.py3-none-any.whl'
    broken_wheel = 'brokenwheel-1.0-py2.py3-none-any.whl'
    future_version = (1, 9)

    unpack_file(data.packages.join(future_wheel),
                tmpdir + 'future', None, None)
    unpack_file(data.packages.join(broken_wheel),
                tmpdir + 'broken', None, None)

    assert wheel.wheel_version(tmpdir + 'future') == future_version
    assert not wheel.wheel_version(tmpdir + 'broken')
Exemple #4
0
def unpack_http_url(link, location, download_cache, only_download):
    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    target_url = link.url.split('#', 1)[0]
    target_file = None
    download_hash = None
    if download_cache:
        target_file = os.path.join(download_cache,
                                   urllib.quote(target_url, ''))
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)
    if (target_file
        and os.path.exists(target_file)
        and os.path.exists(target_file + '.content-type')):
        fp = open(target_file+'.content-type')
        content_type = fp.read().strip()
        fp.close()
        if link.md5_hash:
            download_hash = _get_md5_from_file(target_file, link)
        temp_location = target_file
        logger.notify('Using download cache from %s' % target_file)
    else:
        resp = _get_response_from_url(target_url, link)
        content_type = resp.info()['content-type']
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.info().get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != geturl(resp):
            ext = os.path.splitext(geturl(resp))[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
    if link.md5_hash:
        _check_md5(download_hash, link)
    if only_download:
        _copy_file(temp_location, location, content_type, link)
    else:
        unpack_file(temp_location, location, content_type, link)
    if target_file and target_file != temp_location:
        cache_download(target_file, temp_location, content_type)
    if target_file is None:
        os.unlink(temp_location)
    os.rmdir(temp_dir)
def test_wheel_version(tmpdir, data):
    future_wheel = 'futurewheel-1.9-py2.py3-none-any.whl'
    broken_wheel = 'brokenwheel-1.0-py2.py3-none-any.whl'
    future_version = (1, 9)

    unpack_file(data.packages.join(future_wheel), tmpdir + 'future', None,
                None)
    unpack_file(data.packages.join(broken_wheel), tmpdir + 'broken', None,
                None)

    assert wheel.wheel_version(tmpdir + 'future') == future_version
    assert not wheel.wheel_version(tmpdir + 'broken')
def unpack_http_url(link, location, download_cache, only_download):
    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    target_url = link.url.split('#', 1)[0]
    target_file = None
    download_hash = None
    if download_cache:
        target_file = os.path.join(download_cache,
                                   urllib.quote(target_url, ''))
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)
    if (target_file
        and os.path.exists(target_file)
        and os.path.exists(target_file + '.content-type')):
        fp = open(target_file+'.content-type')
        content_type = fp.read().strip()
        fp.close()
        if link.md5_hash:
            download_hash = _get_md5_from_file(target_file, link)
        temp_location = target_file
        logger.notify('Using download cache from %s' % target_file)
    else:
        resp = _get_response_from_url(target_url, link)
        content_type = resp.info()['content-type']
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.info().get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != geturl(resp):
            ext = os.path.splitext(geturl(resp))[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
    if link.md5_hash:
        _check_md5(download_hash, link)
    if only_download:
        _copy_file(temp_location, location, content_type, link)
    else:
        unpack_file(temp_location, location, content_type, link)
    if target_file and target_file != temp_location:
        cache_download(target_file, temp_location, content_type)
    if target_file is None:
        os.unlink(temp_location)
    os.rmdir(temp_dir)
Exemple #7
0
def unpack_file_url(link, location, download_dir=None):

    link_path = url_to_path(link.url_without_fragment)
    already_downloaded = False

    # If it's a url to a local directory
    if os.path.isdir(link_path):
        if os.path.isdir(location):
            rmtree(location)
        shutil.copytree(link_path, location, symlinks=True)
        return

    # if link has a hash, let's confirm it matches
    if link.hash:
        link_path_hash = _get_hash_from_file(link_path, link)
        _check_hash(link_path_hash, link)

    # If a download dir is specified, is the file already there and valid?
    if download_dir:
        download_path = os.path.join(download_dir, link.filename)
        if os.path.exists(download_path):
            content_type = mimetypes.guess_type(download_path)[0]
            logger.notify('File was already downloaded %s' % download_path)
            if link.hash:
                download_hash = _get_hash_from_file(download_path, link)
                try:
                    _check_hash(download_hash, link)
                    already_downloaded = True
                except HashMismatch:
                    logger.warn(
                        'Previously-downloaded file %s has bad hash, '
                        're-downloading.' % link_path
                        )
                    os.unlink(download_path)
            else:
                already_downloaded = True

    if already_downloaded:
        from_path = download_path
    else:
        from_path = link_path

    content_type = mimetypes.guess_type(from_path)[0]

    # unpack the archive to the build dir location. even when only downloading
    # archives, they have to be unpacked to parse dependencies
    unpack_file(from_path, location, content_type, link)

    # a download dir is specified and not already downloaded
    if download_dir and not already_downloaded:
        _copy_file(from_path, download_dir, content_type, link)
Exemple #8
0
def unpack_file_url(link, location, download_dir=None):

    link_path = url_to_path(link.url_without_fragment)
    already_downloaded = False

    # If it's a url to a local directory
    if os.path.isdir(link_path):
        if os.path.isdir(location):
            rmtree(location)
        shutil.copytree(link_path, location, symlinks=True)
        return

    # if link has a hash, let's confirm it matches
    if link.hash:
        link_path_hash = _get_hash_from_file(link_path, link)
        _check_hash(link_path_hash, link)

    # If a download dir is specified, is the file already there and valid?
    if download_dir:
        download_path = os.path.join(download_dir, link.filename)
        if os.path.exists(download_path):
            content_type = mimetypes.guess_type(download_path)[0]
            logger.notify('File was already downloaded %s' % download_path)
            if link.hash:
                download_hash = _get_hash_from_file(download_path, link)
                try:
                    _check_hash(download_hash, link)
                    already_downloaded = True
                except HashMismatch:
                    logger.warn(
                        'Previously-downloaded file %s has bad hash, '
                        're-downloading.' % link_path
                        )
                    os.unlink(download_path)
            else:
                already_downloaded = True

    if already_downloaded:
        from_path = download_path
    else:
        from_path = link_path

    content_type = mimetypes.guess_type(from_path)[0]

    # unpack the archive to the build dir location. even when only downloading
    # archives, they have to be unpacked to parse dependencies
    unpack_file(from_path, location, content_type, link)

    # a download dir is specified and not already downloaded
    if download_dir and not already_downloaded:
        _copy_file(from_path, download_dir, content_type, link)
Exemple #9
0
 def prep(self, data, tmpdir):
     self.name = "sample"
     self.wheelpath = data.packages.join("sample-1.2.0-py2.py3-none-any.whl")
     self.req = pkg_resources.Requirement.parse("sample")
     self.src = os.path.join(tmpdir, "src")
     self.dest = os.path.join(tmpdir, "dest")
     unpack_file(self.wheelpath, self.src, None, None)
     self.scheme = {
         "scripts": os.path.join(self.dest, "bin"),
         "purelib": os.path.join(self.dest, "lib"),
         "data": os.path.join(self.dest, "data"),
     }
     self.src_dist_info = os.path.join(self.src, "sample-1.2.0.dist-info")
     self.dest_dist_info = os.path.join(self.scheme["purelib"], "sample-1.2.0.dist-info")
    def test_unpack_wheel_no_flatten(self):
        from pip import util
        from tempfile import mkdtemp
        from shutil import rmtree

        filepath = '../data/packages/meta-1.0-py2.py3-none-any.whl'
        if not os.path.exists(filepath):
            pytest.skip("%s does not exist" % filepath)
        try:
            tmpdir = mkdtemp()
            util.unpack_file(filepath, tmpdir, 'application/zip', None)
            assert os.path.isdir(os.path.join(tmpdir, 'meta-1.0.dist-info'))
        finally:
            rmtree(tmpdir)
            pass
 def prep(self, data, tmpdir):
     self.name = 'sample'
     self.wheelpath = data.packages.join(
         'sample-1.2.0-py2.py3-none-any.whl')
     self.req = pkg_resources.Requirement.parse('sample')
     self.src = os.path.join(tmpdir, 'src')
     self.dest = os.path.join(tmpdir, 'dest')
     unpack_file(self.wheelpath, self.src, None, None)
     self.scheme = {
         'scripts': os.path.join(self.dest, 'bin'),
         'purelib': os.path.join(self.dest, 'lib'),
         'data': os.path.join(self.dest, 'data'),
     }
     self.src_dist_info = os.path.join(self.src, 'sample-1.2.0.dist-info')
     self.dest_dist_info = os.path.join(self.scheme['purelib'],
                                        'sample-1.2.0.dist-info')
 def prep(self, data, tmpdir):
     self.name = 'sample'
     self.wheelpath = data.packages.join(
         'sample-1.2.0-py2.py3-none-any.whl')
     self.req = pkg_resources.Requirement.parse('sample')
     self.src = os.path.join(tmpdir, 'src')
     self.dest = os.path.join(tmpdir, 'dest')
     unpack_file(self.wheelpath, self.src, None, None)
     self.scheme = {
         'scripts': os.path.join(self.dest, 'bin'),
         'purelib': os.path.join(self.dest, 'lib'),
         'data': os.path.join(self.dest, 'data'),
     }
     self.src_dist_info = os.path.join(
         self.src, 'sample-1.2.0.dist-info')
     self.dest_dist_info = os.path.join(
         self.scheme['purelib'], 'sample-1.2.0.dist-info')
Exemple #13
0
def unpack_http_url(link, location, download_cache, only_download):
    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    target_url = link.url.split('#', 1)[0]
    target_file = None
    download_hash = None
    if download_cache:
        target_file = os.path.join(download_cache,
                                   urllib.quote(target_url, ''))
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)
    if (target_file
        and os.path.exists(target_file)
        and os.path.exists(target_file+'.content-type')):
        fp = open(target_file+'.content-type')
        content_type = fp.read().strip()
        fp.close()
        if link.md5_hash:
            download_hash = _get_md5_from_file(target_file, link)
        temp_location = target_file
        logger.notify('Using download cache from %s' % target_file)
    else:
        resp = _get_response_from_url(target_url, link)
        content_type = resp.info()['content-type']
        filename = link.filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != geturl(resp):
            ext = os.path.splitext(geturl(resp))[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
    if link.md5_hash:
        _check_md5(download_hash, link)
    if only_download:
        _copy_file(temp_location, location, content_type, link)
    else:
        unpack_file(temp_location, location, content_type, link)
    if target_file and target_file != temp_location:
        cache_download(target_file, temp_location, content_type)
    if target_file is None:
        os.unlink(temp_location)
    os.rmdir(temp_dir)
Exemple #14
0
def get_archive_details(filename):
    if not sh.isfile(filename):
        raise IOError("Can not detail non-existent file %s" % (filename))

    # Check if we already got the details of this file previously
    cache_key = "f:%s:%s" % (sh.basename(filename), sh.getsize(filename))
    if cache_key in EGGS_DETAILED:
        return EGGS_DETAILED[cache_key]

    # Get pip to get us the egg-info.
    with utils.tempdir() as td:
        filename = sh.copy(filename, sh.joinpths(td, sh.basename(filename)))
        extract_to = sh.mkdir(sh.joinpths(td, 'build'))
        pip_util.unpack_file(filename, extract_to, content_type='', link='')
        details = get_directory_details(extract_to)

    EGGS_DETAILED[cache_key] = details
    return details
Exemple #15
0
def get_archive_details(filename):
    if not sh.isfile(filename):
        raise IOError("Can not detail non-existent file %s" % (filename))

    # Check if we already got the details of this file previously
    cache_key = "f:%s:%s" % (sh.basename(filename), sh.getsize(filename))
    if cache_key in EGGS_DETAILED:
        return EGGS_DETAILED[cache_key]

    # Get pip to get us the egg-info.
    with utils.tempdir() as td:
        filename = sh.copy(filename, sh.joinpths(td, sh.basename(filename)))
        extract_to = sh.mkdir(sh.joinpths(td, 'build'))
        pip_util.unpack_file(filename, extract_to, content_type='', link='')
        details = get_directory_details(extract_to)

    EGGS_DETAILED[cache_key] = details
    return details
Exemple #16
0
def unpack_http_url(link, location, download_cache, download_dir=None):
    temp_dir = tempfile.mkdtemp("-unpack", "pip-")
    target_url = link.url.split("#", 1)[0]
    target_file = None
    download_hash = None
    if download_cache:
        target_file = os.path.join(download_cache, urllib.quote(target_url, ""))
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)

    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    if target_file and os.path.exists(target_file) and os.path.exists(target_file + ".content-type"):
        fp = open(target_file + ".content-type")
        content_type = fp.read().strip()
        fp.close()
        if link.hash and link.hash_name:
            download_hash = _get_hash_from_file(target_file, link)
        temp_location = target_file
        logger.notify("Using download cache from %s" % target_file)
    elif already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
        logger.notify("File was already downloaded %s" % already_downloaded)
    else:
        resp = _get_response_from_url(target_url, link)
        content_type = resp.info()["content-type"]
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.info().get("content-disposition")
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get("filename") or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != geturl(resp):
            ext = os.path.splitext(geturl(resp))[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
    if link.hash and link.hash_name:
        _check_hash(download_hash, link)
    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)
    unpack_file(temp_location, location, content_type, link)
    if target_file and target_file != temp_location:
        cache_download(target_file, temp_location, content_type)
    if target_file is None and not already_downloaded:
        os.unlink(temp_location)
    os.rmdir(temp_dir)
Exemple #17
0
def unpack_http_url(link, location, download_cache, download_dir=None):
    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    temp_location = None
    target_url = link.url.split('#', 1)[0]

    already_cached = False
    cache_file = None
    cache_content_type_file = None
    download_hash = None
    if download_cache:
        cache_file = os.path.join(download_cache, urllib.quote(target_url, ''))
        cache_content_type_file = cache_file + '.content-type'
        already_cached = (os.path.exists(cache_file)
                          and os.path.exists(cache_content_type_file))
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)

    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    if already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)[0]
        logger.notify('File was already downloaded %s' % already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn('Previously-downloaded file %s has bad hash, '
                            're-downloading.' % temp_location)
                temp_location = None
                os.unlink(already_downloaded)
                already_downloaded = None

    # We have a cached file, and we haven't already found a good downloaded copy
    if already_cached and not temp_location:
        with open(cache_content_type_file) as fp:
            content_type = fp.read().strip()
        temp_location = cache_file
        logger.notify('Using download cache from %s' % cache_file)
        if link.hash and link.hash_name:
            download_hash = _get_hash_from_file(cache_file, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn('Cached file %s has bad hash, '
                            're-downloading.' % temp_location)
                temp_location = None
                os.unlink(cache_file)
                os.unlink(cache_content_type_file)
                already_cached = False

    # We don't have either a cached or a downloaded copy
    if not temp_location:
        resp = _get_response_from_url(target_url, link)
        content_type = resp.info().get('content-type', '')
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.info().get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != geturl(resp):
            ext = os.path.splitext(geturl(resp))[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
        if link.hash and link.hash_name:
            _check_hash(download_hash, link)

    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)
    unpack_file(temp_location, location, content_type, link)
    if cache_file and not already_cached:
        cache_download(cache_file, temp_location, content_type)
    if not (already_cached or already_downloaded):
        os.unlink(temp_location)
    os.rmdir(temp_dir)
Exemple #18
0
def unpack_http_url(link, location, download_dir=None, session=None):
    if session is None:
        raise TypeError(
            "unpack_http_url() missing 1 required keyword argument: 'session'")

    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    temp_location = None
    target_url = link.url.split('#', 1)[0]

    download_hash = None

    # If a download dir is specified, is the file already downloaded there?
    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    # If already downloaded, does its hash match?
    if already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)[0]
        logger.notify('File was already downloaded %s' % already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn('Previously-downloaded file %s has bad hash, '
                            're-downloading.' % temp_location)
                temp_location = None
                os.unlink(already_downloaded)
                already_downloaded = None

    # let's download to a tmp dir
    if not temp_location:
        try:
            resp = session.get(
                target_url,
                # We use Accept-Encoding: identity here because requests
                # defaults to accepting compressed responses. This breaks in
                # a variety of ways depending on how the server is configured.
                # - Some servers will notice that the file isn't a compressible
                #   file and will leave the file alone and with an empty
                #   Content-Encoding
                # - Some servers will notice that the file is already
                #   compressed and will leave the file alone and will add a
                #   Content-Encoding: gzip header
                # - Some servers won't notice anything at all and will take
                #   a file that's already been compressed and compress it again
                #   and set the Content-Encoding: gzip header
                # By setting this to request only the identity encoding We're
                # hoping to eliminate the third case. Hopefully there does not
                # exist a server which when given a file will notice it is
                # already compressed and that you're not asking for a
                # compressed file and will then decompress it before sending
                # because if that's the case I don't think it'll ever be
                # possible to make this work.
                headers={"Accept-Encoding": "identity"},
                stream=True,
            )
            resp.raise_for_status()
        except requests.HTTPError as exc:
            logger.fatal("HTTP error %s while getting %s" %
                         (exc.response.status_code, link))
            raise

        content_type = resp.headers.get('content-type', '')
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.headers.get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != resp.url:
            ext = os.path.splitext(resp.url)[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
        if link.hash and link.hash_name:
            _check_hash(download_hash, link)

    # a download dir is specified; let's copy the archive there
    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)

    # unpack the archive to the build dir location. even when only downloading
    # archives, they have to be unpacked to parse dependencies
    unpack_file(temp_location, location, content_type, link)

    if not already_downloaded:
        os.unlink(temp_location)

    os.rmdir(temp_dir)
Exemple #19
0
def unpack_http_url(link, location, download_cache, download_dir=None, session=None):
    if session is None:
        session = PipSession()

    temp_dir = tempfile.mkdtemp("-unpack", "pip-")
    temp_location = None
    target_url = link.url.split("#", 1)[0]
    already_cached = False
    cache_file = None
    cache_content_type_file = None
    download_hash = None

    # If a download cache is specified, is the file cached there?
    if download_cache:
        cache_file = os.path.join(download_cache, urllib.quote(target_url, ""))
        cache_content_type_file = cache_file + ".content-type"
        already_cached = os.path.exists(cache_file) and os.path.exists(cache_content_type_file)
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)

    # If a download dir is specified, is the file already downloaded there?
    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    # If already downloaded, does it's hash match?
    if already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)[0]
        logger.notify("File was already downloaded %s" % already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn("Previously-downloaded file %s has bad hash, " "re-downloading." % temp_location)
                temp_location = None
                os.unlink(already_downloaded)
                already_downloaded = None

    # If not a valid download, let's confirm the cached file is valid
    if already_cached and not temp_location:
        with open(cache_content_type_file) as fp:
            content_type = fp.read().strip()
        temp_location = cache_file
        logger.notify("Using download cache from %s" % cache_file)
        if link.hash and link.hash_name:
            download_hash = _get_hash_from_file(cache_file, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn("Cached file %s has bad hash, " "re-downloading." % temp_location)
                temp_location = None
                os.unlink(cache_file)
                os.unlink(cache_content_type_file)
                already_cached = False

    # We don't have either a cached or a downloaded copy
    # let's download to a tmp dir
    if not temp_location:
        try:
            resp = session.get(target_url, stream=True)
            resp.raise_for_status()
        except requests.HTTPError as exc:
            logger.fatal("HTTP error %s while getting %s" % (exc.response.status_code, link))
            raise

        content_type = resp.headers.get("content-type", "")
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.headers.get("content-disposition")
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get("filename") or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != resp.url:
            ext = os.path.splitext(resp.url)[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
        if link.hash and link.hash_name:
            _check_hash(download_hash, link)

    # a download dir is specified; let's copy the archive there
    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)

    # unpack the archive to the build dir location. even when only downloading
    # archives, they have to be unpacked to parse dependencies
    unpack_file(temp_location, location, content_type, link)

    # if using a download cache, cache it, if needed
    if cache_file and not already_cached:
        cache_download(cache_file, temp_location, content_type)

    if not (already_cached or already_downloaded):
        os.unlink(temp_location)

    os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None,
                    session=None):
    if session is None:
        session = PipSession()

    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    temp_location = None
    target_url = link.url.split('#', 1)[0]
    already_cached = False
    cache_file = None
    cache_content_type_file = None
    download_hash = None

    # If a download cache is specified, is the file cached there?
    if download_cache:
        cache_file = os.path.join(
            download_cache,
            urllib.quote(target_url, '')
        )
        cache_content_type_file = cache_file + '.content-type'
        already_cached = (
            os.path.exists(cache_file) and
            os.path.exists(cache_content_type_file)
        )
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)

    # If a download dir is specified, is the file already downloaded there?
    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    # If already downloaded, does its hash match?
    if already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)[0]
        logger.notify('File was already downloaded %s' % already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn(
                    'Previously-downloaded file %s has bad hash, '
                    're-downloading.' % temp_location
                )
                temp_location = None
                os.unlink(already_downloaded)
                already_downloaded = None

    # If not a valid download, let's confirm the cached file is valid
    if already_cached and not temp_location:
        with open(cache_content_type_file) as fp:
            content_type = fp.read().strip()
        temp_location = cache_file
        logger.notify('Using download cache from %s' % cache_file)
        if link.hash and link.hash_name:
            download_hash = _get_hash_from_file(cache_file, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn(
                    'Cached file %s has bad hash, '
                    're-downloading.' % temp_location
                )
                temp_location = None
                os.unlink(cache_file)
                os.unlink(cache_content_type_file)
                already_cached = False

    # We don't have either a cached or a downloaded copy
    # let's download to a tmp dir
    if not temp_location:
        try:
            resp = session.get(
                target_url,
                # We use Accept-Encoding: identity here because requests
                # defaults to accepting compressed responses. This breaks in
                # a variety of ways depending on how the server is configured.
                # - Some servers will notice that the file isn't a compressible
                #   file and will leave the file alone and with an empty
                #   Content-Encoding
                # - Some servers will notice that the file is already
                #   compressed and will leave the file alone and will add a
                #   Content-Encoding: gzip header
                # - Some servers won't notice anything at all and will take
                #   a file that's already been compressed and compress it again
                #   and set the Content-Encoding: gzip header
                # By setting this to request only the identity encoding We're
                # hoping to eliminate the third case. Hopefully there does not
                # exist a server which when given a file will notice it is
                # already compressed and that you're not asking for a
                # compressed file and will then decompress it before sending
                # because if that's the case I don't think it'll ever be
                # possible to make this work.
                headers={"Accept-Encoding": "identity"},
                stream=True,
            )
            resp.raise_for_status()
        except requests.HTTPError as exc:
            logger.fatal("HTTP error %s while getting %s" %
                         (exc.response.status_code, link))
            raise

        content_type = resp.headers.get('content-type', '')
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.headers.get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != resp.url:
            ext = os.path.splitext(resp.url)[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
        if link.hash and link.hash_name:
            _check_hash(download_hash, link)

    # a download dir is specified; let's copy the archive there
    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)

    # unpack the archive to the build dir location. even when only downloading
    # archives, they have to be unpacked to parse dependencies
    unpack_file(temp_location, location, content_type, link)

    # if using a download cache, cache it, if needed
    if cache_file and not already_cached:
        cache_download(cache_file, temp_location, content_type)

    if not (already_cached or already_downloaded):
        os.unlink(temp_location)

    os.rmdir(temp_dir)
def unpack_http_url(link, location, download_cache, download_dir=None,
                    session=None):
    if session is None:
        session = PipSession()

    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    temp_location = None
    target_url = link.url.split('#', 1)[0]

    already_cached = False
    cache_file = None
    cache_content_type_file = None
    download_hash = None
    if download_cache:
        cache_file = os.path.join(download_cache,
                                   urllib.quote(target_url, ''))
        cache_content_type_file = cache_file + '.content-type'
        already_cached = (
            os.path.exists(cache_file) and
            os.path.exists(cache_content_type_file)
            )
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)

    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    if already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)[0]
        logger.notify('File was already downloaded %s' % already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn(
                    'Previously-downloaded file %s has bad hash, '
                    're-downloading.' % temp_location
                    )
                temp_location = None
                os.unlink(already_downloaded)
                already_downloaded = None

    # We have a cached file, and we haven't already found a good downloaded copy
    if already_cached and not temp_location:
        with open(cache_content_type_file) as fp:
            content_type = fp.read().strip()
        temp_location = cache_file
        logger.notify('Using download cache from %s' % cache_file)
        if link.hash and link.hash_name:
            download_hash = _get_hash_from_file(cache_file, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn(
                    'Cached file %s has bad hash, '
                    're-downloading.' % temp_location
                    )
                temp_location = None
                os.unlink(cache_file)
                os.unlink(cache_content_type_file)
                already_cached = False

    # We don't have either a cached or a downloaded copy
    if not temp_location:
        try:
            resp = session.get(target_url, stream=True)
            resp.raise_for_status()
        except requests.HTTPError as exc:
            logger.fatal("HTTP error %s while getting %s" %
                         (exc.response.status_code, link))
            raise

        content_type = resp.headers.get('content-type', '')
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.headers.get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != resp.url:
            ext = os.path.splitext(resp.url)[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
        if link.hash and link.hash_name:
            _check_hash(download_hash, link)

    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)
    unpack_file(temp_location, location, content_type, link)
    if cache_file and not already_cached:
        cache_download(cache_file, temp_location, content_type)
    if not (already_cached or already_downloaded):
        os.unlink(temp_location)
    os.rmdir(temp_dir)
Exemple #22
0
def unpack_http_url(link,
                    location,
                    download_cache,
                    download_dir=None,
                    session=None):
    if session is None:
        session = PipSession()

    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
    temp_location = None
    target_url = link.url.split('#', 1)[0]
    already_cached = False
    cache_file = None
    cache_content_type_file = None
    download_hash = None

    # If a download cache is specified, is the file cached there?
    if download_cache:
        cache_file = os.path.join(download_cache, urllib.quote(target_url, ''))
        cache_content_type_file = cache_file + '.content-type'
        already_cached = (os.path.exists(cache_file)
                          and os.path.exists(cache_content_type_file))
        if not os.path.isdir(download_cache):
            create_download_cache_folder(download_cache)

    # If a download dir is specified, is the file already downloaded there?
    already_downloaded = None
    if download_dir:
        already_downloaded = os.path.join(download_dir, link.filename)
        if not os.path.exists(already_downloaded):
            already_downloaded = None

    # If already downloaded, does it's hash match?
    if already_downloaded:
        temp_location = already_downloaded
        content_type = mimetypes.guess_type(already_downloaded)[0]
        logger.notify('File was already downloaded %s' % already_downloaded)
        if link.hash:
            download_hash = _get_hash_from_file(temp_location, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn('Previously-downloaded file %s has bad hash, '
                            're-downloading.' % temp_location)
                temp_location = None
                os.unlink(already_downloaded)
                already_downloaded = None

    # If not a valid download, let's confirm the cached file is valid
    if already_cached and not temp_location:
        with open(cache_content_type_file) as fp:
            content_type = fp.read().strip()
        temp_location = cache_file
        logger.notify('Using download cache from %s' % cache_file)
        if link.hash and link.hash_name:
            download_hash = _get_hash_from_file(cache_file, link)
            try:
                _check_hash(download_hash, link)
            except HashMismatch:
                logger.warn('Cached file %s has bad hash, '
                            're-downloading.' % temp_location)
                temp_location = None
                os.unlink(cache_file)
                os.unlink(cache_content_type_file)
                already_cached = False

    # We don't have either a cached or a downloaded copy
    # let's download to a tmp dir
    if not temp_location:
        try:
            resp = session.get(target_url, stream=True)
            resp.raise_for_status()
        except requests.HTTPError as exc:
            logger.fatal("HTTP error %s while getting %s" %
                         (exc.response.status_code, link))
            raise

        content_type = resp.headers.get('content-type', '')
        filename = link.filename  # fallback
        # Have a look at the Content-Disposition header for a better guess
        content_disposition = resp.headers.get('content-disposition')
        if content_disposition:
            type, params = cgi.parse_header(content_disposition)
            # We use ``or`` here because we don't want to use an "empty" value
            # from the filename param.
            filename = params.get('filename') or filename
        ext = splitext(filename)[1]
        if not ext:
            ext = mimetypes.guess_extension(content_type)
            if ext:
                filename += ext
        if not ext and link.url != resp.url:
            ext = os.path.splitext(resp.url)[1]
            if ext:
                filename += ext
        temp_location = os.path.join(temp_dir, filename)
        download_hash = _download_url(resp, link, temp_location)
        if link.hash and link.hash_name:
            _check_hash(download_hash, link)

    # a download dir is specified; let's copy the archive there
    if download_dir and not already_downloaded:
        _copy_file(temp_location, download_dir, content_type, link)

    # unpack the archive to the build dir location. even when only downloading
    # archives, they have to be unpacked to parse dependencies
    unpack_file(temp_location, location, content_type, link)

    # if using a download cache, cache it, if needed
    if cache_file and not already_cached:
        cache_download(cache_file, temp_location, content_type)

    if not (already_cached or already_downloaded):
        os.unlink(temp_location)

    os.rmdir(temp_dir)