Example #1
0
 def links(self):
     """Yields all links in the page"""
     for anchor in self.parsed.findall(".//a"):
         if anchor.get("href"):
             href = anchor.get("href")
             url = self.clean_link(urllib_parse.urljoin(self.base_url, href))
             yield Link(url, self)
def path_to_url(path):
    """
    Convert a path to a file: URL.  The path will be made absolute and have
    quoted path parts.
    """
    path = os.path.normpath(os.path.abspath(path))
    url = urllib_parse.urljoin('file:', urllib_request.pathname2url(path))
    return url
Example #3
0
 def links(self):
     """Yields all links in the page"""
     for anchor in self.parsed.findall(".//a"):
         if anchor.get("href"):
             href = anchor.get("href")
             url = self.clean_link(
                 urllib_parse.urljoin(self.base_url, href)
             )
             pyrequire = anchor.get('data-requires-python')
             pyrequire = unescape(pyrequire) if pyrequire else None
             yield Link(url, self, requires_python=pyrequire)
Example #4
0
def _get_html_page(link, session=None):
    # type: (Link, Optional[PipSession]) -> Optional[HTMLPage]
    if session is None:
        raise TypeError(
            "_get_html_page() missing 1 required keyword argument: 'session'"
        )

    url = link.url.split('#', 1)[0]

    # Check for VCS schemes that do not support lookup as web pages.
    vcs_scheme = _match_vcs_scheme(url)
    if vcs_scheme:
        logger.debug('Cannot look at %s URL %s', vcs_scheme, link)
        return None

    # Tack index.html onto file:// URLs that point to directories
    scheme, _, path, _, _, _ = urllib_parse.urlparse(url)
    if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))):
        # add trailing slash if not present so urljoin doesn't trim
        # final segment
        if not url.endswith('/'):
            url += '/'
        url = urllib_parse.urljoin(url, 'index.html')
        logger.debug(' file: URL is directory, getting %s', url)

    try:
        resp = _get_html_response(url, session=session)
    except _NotHTTP as exc:
        logger.debug(
            'Skipping page %s because it looks like an archive, and cannot '
            'be checked by HEAD.', link,
        )
    except _NotHTML as exc:
        logger.debug(
            'Skipping page %s because the %s request got Content-Type: %s',
            link, exc.request_desc, exc.content_type,
        )
    except requests.HTTPError as exc:
        _handle_get_page_fail(link, exc)
    except RetryError as exc:
        _handle_get_page_fail(link, exc)
    except SSLError as exc:
        reason = "There was a problem confirming the ssl certificate: "
        reason += str(exc)
        _handle_get_page_fail(link, reason, meth=logger.info)
    except requests.ConnectionError as exc:
        _handle_get_page_fail(link, "connection error: %s" % exc)
    except requests.Timeout:
        _handle_get_page_fail(link, "timed out")
    else:
        return HTMLPage(resp.content, resp.url, resp.headers)
    return None
Example #5
0
    def explicit_rel_links(self, rels=("homepage", "download")):
        """Yields all links with the given relations"""
        rels = set(rels)

        for anchor in self.parsed.findall(".//a"):
            if anchor.get("rel") and anchor.get("href"):
                found_rels = set(anchor.get("rel").split())
                # Determine the intersection between what rels were found and
                #   what rels were being looked for
                if found_rels & rels:
                    href = anchor.get("href")
                    url = self.clean_link(urllib_parse.urljoin(self.base_url, href))
                    yield Link(url, self, trusted=False)
Example #6
0
File: index.py Project: jaraco/pip
 def iter_links(self):
     """Yields all links in the page"""
     document = html5lib.parse(
         self.content,
         transport_encoding=_get_encoding_from_headers(self.headers),
         namespaceHTMLElements=False,
     )
     base_url = _determine_base_url(document, self.url)
     for anchor in document.findall(".//a"):
         if anchor.get("href"):
             href = anchor.get("href")
             url = _clean_link(urllib_parse.urljoin(base_url, href))
             pyrequire = anchor.get('data-requires-python')
             pyrequire = unescape(pyrequire) if pyrequire else None
             yield Link(url, self.url, requires_python=pyrequire)
Example #7
0
    def links(self):
        """Yields all links in the page"""
        for anchor in self.parsed.findall(".//a"):
            if anchor.get("href"):
                href = anchor.get("href")
                url = self.clean_link(urllib_parse.urljoin(self.base_url, href))

                # Determine if this link is internal. If that distinction
                #   doesn't make sense in this context, then we don't make
                #   any distinction.
                internal = None
                if self.api_version and self.api_version >= 2:
                    # Only api_versions >= 2 have a distinction between
                    #   external and internal links
                    internal = bool(anchor.get("rel") and "internal" in anchor.get("rel").split())

                yield Link(url, self, internal=internal)
Example #8
0
 def scraped_rel_links(self):
     # Can we get rid of this horrible horrible method?
     for regex in (self._homepage_re, self._download_re):
         match = regex.search(self.content)
         if not match:
             continue
         href_match = self._href_re.search(self.content, pos=match.end())
         if not href_match:
             continue
         url = href_match.group(1) or href_match.group(2) or href_match.group(3)
         if not url:
             continue
         try:
             url = url.decode("ascii")
         except UnicodeDecodeError:
             continue
         url = self.clean_link(urllib_parse.urljoin(self.base_url, url))
         yield Link(url, self, trusted=False, _deprecated_regex=True)
Example #9
0
def process_line(line,
                 filename,
                 line_number,
                 finder=None,
                 comes_from=None,
                 options=None,
                 session=None,
                 wheel_cache=None,
                 constraint=False):
    """Process a single requirements line; This can result in creating/yielding
    requirements, or updating the finder.

    For lines that contain requirements, the only options that have an effect
    are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
    requirement. Other options from SUPPORTED_OPTIONS may be present, but are
    ignored.

    For lines that do not contain requirements, the only options that have an
    effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
    be present, but are ignored. These lines may contain multiple options
    (although our docs imply only one is supported), and all our parsed and
    affect the finder.

    :param constraint: If True, parsing a constraints file.
    :param options: OptionParser options that we may update
    """
    parser = build_parser(line)
    defaults = parser.get_default_values()
    defaults.index_url = None
    if finder:
        # `finder.format_control` will be updated during parsing
        defaults.format_control = finder.format_control
    args_str, options_str = break_args_options(line)
    if sys.version_info < (2, 7, 3):
        # Prior to 2.7.3, shlex cannot deal with unicode entries
        options_str = options_str.encode('utf8')
    opts, _ = parser.parse_args(shlex.split(options_str), defaults)

    # preserve for the nested code path
    line_comes_from = '%s %s (line %s)' % (
        '-c' if constraint else '-r',
        filename,
        line_number,
    )

    # yield a line requirement
    if args_str:
        isolated = options.isolated_mode if options else False
        if options:
            cmdoptions.check_install_build_global(options, opts)
        # get the options that apply to requirements
        req_options = {}
        for dest in SUPPORTED_OPTIONS_REQ_DEST:
            if dest in opts.__dict__ and opts.__dict__[dest]:
                req_options[dest] = opts.__dict__[dest]
        yield InstallRequirement.from_line(args_str,
                                           line_comes_from,
                                           constraint=constraint,
                                           isolated=isolated,
                                           options=req_options,
                                           wheel_cache=wheel_cache)

    # yield an editable requirement
    elif opts.editables:
        isolated = options.isolated_mode if options else False
        yield InstallRequirement.from_editable(opts.editables[0],
                                               comes_from=line_comes_from,
                                               constraint=constraint,
                                               isolated=isolated,
                                               wheel_cache=wheel_cache)

    # parse a nested requirements file
    elif opts.requirements or opts.constraints:
        if opts.requirements:
            req_path = opts.requirements[0]
            nested_constraint = False
        else:
            req_path = opts.constraints[0]
            nested_constraint = True
        # original file is over M_http
        if SCHEME_RE.search(filename):
            # do a url join so relative paths work
            req_path = urllib_parse.urljoin(filename, req_path)
        # original file and nested file are paths
        elif not SCHEME_RE.search(req_path):
            # do a join so relative paths work
            req_path = os.path.join(os.path.dirname(filename), req_path)
        # TODO: Why not use `comes_from='-r {} (line {})'` here as well?
        parser = parse_requirements(req_path,
                                    finder,
                                    comes_from,
                                    options,
                                    session,
                                    constraint=nested_constraint,
                                    wheel_cache=wheel_cache)
        for req in parser:
            yield req

    # percolate hash-checking option upward
    elif opts.require_hashes:
        options.require_hashes = opts.require_hashes

    # set finder options
    elif finder:
        if opts.index_url:
            finder.index_urls = [opts.index_url]
        if opts.no_index is True:
            finder.index_urls = []
        if opts.extra_index_urls:
            finder.index_urls.extend(opts.extra_index_urls)
        if opts.find_links:
            # FIXME: it would be nice to keep track of the source
            # of the find_links: support a find-links local path
            # relative to a requirements file.
            value = opts.find_links[0]
            req_dir = os.path.dirname(os.path.abspath(filename))
            relative_to_reqs_file = os.path.join(req_dir, value)
            if os.path.exists(relative_to_reqs_file):
                value = relative_to_reqs_file
            finder.find_links.append(value)
        if opts.pre:
            finder.allow_all_prereleases = True
        if opts.process_dependency_links:
            finder.process_dependency_links = True
        if opts.trusted_hosts:
            finder.secure_origins.extend(
                ("*", host, "*") for host in opts.trusted_hosts)
Example #10
0
def _get_html_page(link, session=None):
    if session is None:
        raise TypeError(
            "_get_html_page() missing 1 required keyword argument: 'session'")

    url = link.url
    url = url.split('#', 1)[0]

    # Check for VCS schemes that do not support lookup as web pages.
    from pip._internal.vcs import VcsSupport
    for scheme in VcsSupport.schemes:
        if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
            logger.debug('Cannot look at %s URL %s', scheme, link)
            return None

    try:
        filename = link.filename
        for bad_ext in ARCHIVE_EXTENSIONS:
            if filename.endswith(bad_ext):
                content_type = _get_content_type(url, session=session)
                if content_type.lower().startswith('text/html'):
                    break
                else:
                    logger.debug(
                        'Skipping page %s because of Content-Type: %s',
                        link,
                        content_type,
                    )
                    return

        logger.debug('Getting page %s', url)

        # Tack blank.html onto file:// URLs that point to directories
        (scheme, netloc, path, params, query, fragment) = \
            urllib_parse.urlparse(url)
        if (scheme == 'file'
                and os.path.isdir(urllib_request.url2pathname(path))):
            # add trailing slash if not present so urljoin doesn't trim
            # final segment
            if not url.endswith('/'):
                url += '/'
            url = urllib_parse.urljoin(url, 'blank.html')
            logger.debug(' file: URL is directory, getting %s', url)

        resp = session.get(
            url,
            headers={
                "Accept": "text/html",
                # We don't want to blindly returned cached data for
                # /simple/, because authors generally expecting that
                # twine upload && pip install will function, but if
                # they've done a pip install in the last ~10 minutes
                # it won't. Thus by setting this to zero we will not
                # blindly use any cached data, however the benefit of
                # using max-age=0 instead of no-cache, is that we will
                # still support conditional requests, so we will still
                # minimize traffic sent in cases where the page hasn't
                # changed at all, we will just always incur the round
                # trip for the conditional GET now instead of only
                # once per 10 minutes.
                # For more information, please see pypa/pip#5670.
                "Cache-Control": "max-age=0",
            },
        )
        resp.raise_for_status()

        # The check for archives above only works if the url ends with
        # something that looks like an archive. However that is not a
        # requirement of an url. Unless we issue a HEAD request on every
        # url we cannot know ahead of time for sure if something is HTML
        # or not. However we can check after we've downloaded it.
        content_type = resp.headers.get('Content-Type', 'unknown')
        if not content_type.lower().startswith("text/html"):
            logger.debug(
                'Skipping page %s because of Content-Type: %s',
                link,
                content_type,
            )
            return

        inst = HTMLPage(resp.content, resp.url, resp.headers)
    except HTTPError as exc:
        _handle_get_page_fail(link, exc, url)
    except SSLError as exc:
        reason = "There was a problem confirming the ssl certificate: "
        reason += str(exc)
        _handle_get_page_fail(link, reason, url, meth=logger.info)
    except requests.ConnectionError as exc:
        _handle_get_page_fail(link, "connection error: %s" % exc, url)
    except requests.Timeout:
        _handle_get_page_fail(link, "timed out", url)
    else:
        return inst
Example #11
0
    def get_page(cls, link, skip_archives=True, session=None):
        if session is None:
            raise TypeError(
                "get_page() missing 1 required keyword argument: 'session'")

        url = link.url
        url = url.split('#', 1)[0]

        # Check for VCS schemes that do not support lookup as web pages.
        from pip.vcs import VcsSupport
        for scheme in VcsSupport.schemes:
            if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
                logger.debug('Cannot look at %s URL %s', scheme, link)
                return None

        try:
            if skip_archives:
                filename = link.filename
                for bad_ext in ARCHIVE_EXTENSIONS:
                    if filename.endswith(bad_ext):
                        content_type = cls._get_content_type(
                            url,
                            session=session,
                        )
                        if content_type.lower().startswith('text/html'):
                            break
                        else:
                            logger.debug(
                                'Skipping page %s because of Content-Type: %s',
                                link,
                                content_type,
                            )
                            return

            logger.debug('Getting page %s', url)

            # Tack index.html onto file:// URLs that point to directories
            (scheme, netloc, path, params, query, fragment) = \
                urllib_parse.urlparse(url)
            if (scheme == 'file'
                    and os.path.isdir(urllib_request.url2pathname(path))):
                # add trailing slash if not present so urljoin doesn't trim
                # final segment
                if not url.endswith('/'):
                    url += '/'
                url = urllib_parse.urljoin(url, 'index.html')
                logger.debug(' file: URL is directory, getting %s', url)

            resp = session.get(
                url,
                headers={
                    "Accept": "text/html",
                    "Cache-Control": "max-age=600",
                },
            )
            resp.raise_for_status()

            # The check for archives above only works if the url ends with
            # something that looks like an archive. However that is not a
            # requirement of an url. Unless we issue a HEAD request on every
            # url we cannot know ahead of time for sure if something is HTML
            # or not. However we can check after we've downloaded it.
            content_type = resp.headers.get('Content-Type', 'unknown')
            if not content_type.lower().startswith("text/html"):
                logger.debug(
                    'Skipping page %s because of Content-Type: %s',
                    link,
                    content_type,
                )
                return

            inst = cls(resp.content, resp.url, resp.headers)
        except requests.HTTPError as exc:
            cls._handle_fail(link, exc, url)
        except SSLError as exc:
            reason = ("There was a problem confirming the ssl certificate: "
                      "%s" % exc)
            cls._handle_fail(link, reason, url, meth=logger.info)
        except requests.ConnectionError as exc:
            cls._handle_fail(link, "connection error: %s" % exc, url)
        except requests.Timeout:
            cls._handle_fail(link, "timed out", url)
        else:
            return inst
Example #12
0
 def _url_for_path(self, path):
     # type: (str) -> str
     return urllib_parse.urljoin(self.url, path)
Example #13
0
def _get_html_page(link, session=None):
    if session is None:
        raise TypeError(
            "_get_html_page() missing 1 required keyword argument: 'session'"
        )

    url = link.url
    url = url.split('#', 1)[0]

    # Check for VCS schemes that do not support lookup as web pages.
    from pip._internal.vcs import VcsSupport
    for scheme in VcsSupport.schemes:
        if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
            logger.debug('Cannot look at %s URL %s', scheme, link)
            return None

    try:
        filename = link.filename
        for bad_ext in ARCHIVE_EXTENSIONS:
            if filename.endswith(bad_ext):
                content_type = _get_content_type(url, session=session)
                if content_type.lower().startswith('text/html'):
                    break
                else:
                    logger.debug(
                        'Skipping page %s because of Content-Type: %s',
                        link,
                        content_type,
                    )
                    return

        logger.debug('Getting page %s', url)

        # Tack index.html onto file:// URLs that point to directories
        (scheme, netloc, path, params, query, fragment) = \
            urllib_parse.urlparse(url)
        if (scheme == 'file' and
                os.path.isdir(urllib_request.url2pathname(path))):
            # add trailing slash if not present so urljoin doesn't trim
            # final segment
            if not url.endswith('/'):
                url += '/'
            url = urllib_parse.urljoin(url, 'index.html')
            logger.debug(' file: URL is directory, getting %s', url)

        resp = session.get(
            url,
            headers={
                "Accept": "text/html",
                # We don't want to blindly returned cached data for
                # /simple/, because authors generally expecting that
                # twine upload && pip install will function, but if
                # they've done a pip install in the last ~10 minutes
                # it won't. Thus by setting this to zero we will not
                # blindly use any cached data, however the benefit of
                # using max-age=0 instead of no-cache, is that we will
                # still support conditional requests, so we will still
                # minimize traffic sent in cases where the page hasn't
                # changed at all, we will just always incur the round
                # trip for the conditional GET now instead of only
                # once per 10 minutes.
                # For more information, please see pypa/pip#5670.
                "Cache-Control": "max-age=0",
            },
        )
        resp.raise_for_status()

        # The check for archives above only works if the url ends with
        # something that looks like an archive. However that is not a
        # requirement of an url. Unless we issue a HEAD request on every
        # url we cannot know ahead of time for sure if something is HTML
        # or not. However we can check after we've downloaded it.
        content_type = resp.headers.get('Content-Type', 'unknown')
        if not content_type.lower().startswith("text/html"):
            logger.debug(
                'Skipping page %s because of Content-Type: %s',
                link,
                content_type,
            )
            return

        inst = HTMLPage(resp.content, resp.url, resp.headers)
    except requests.HTTPError as exc:
        _handle_get_page_fail(link, exc, url)
    except SSLError as exc:
        reason = "There was a problem confirming the ssl certificate: "
        reason += str(exc)
        _handle_get_page_fail(link, reason, url, meth=logger.info)
    except requests.ConnectionError as exc:
        _handle_get_page_fail(link, "connection error: %s" % exc, url)
    except requests.Timeout:
        _handle_get_page_fail(link, "timed out", url)
    else:
        return inst
Example #14
0
 def url_to_path(self, path):
     return urllib_parse.urljoin(self.url, path)
Example #15
0
<<<<<<< HEAD
        logger.warning('Cannot look at %s URL %s because it does not support '
                       'lookup as web pages.', vcs_scheme, link)
=======
        logger.debug('Cannot look at %s URL %s', vcs_scheme, link)
>>>>>>> b66a76afa15ab74019740676a52a071b85ed8f71
        return None

    # Tack index.html onto file:// URLs that point to directories
    scheme, _, path, _, _, _ = urllib_parse.urlparse(url)
    if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))):
        # add trailing slash if not present so urljoin doesn't trim
        # final segment
        if not url.endswith('/'):
            url += '/'
        url = urllib_parse.urljoin(url, 'index.html')
        logger.debug(' file: URL is directory, getting %s', url)

    try:
        resp = _get_html_response(url, session=session)
    except _NotHTTP:
<<<<<<< HEAD
        logger.warning(
            'Skipping page %s because it looks like an archive, and cannot '
            'be checked by a HTTP HEAD request.', link,
        )
    except _NotHTML as exc:
        logger.warning(
            'Skipping page %s because the %s request got Content-Type: %s.'
            'The only supported Content-Type is text/html',
            link, exc.request_desc, exc.content_type,
Example #16
0
    def get_page(cls, link, skip_archives=True, session=None):
        if session is None:
            raise TypeError(
                "get_page() missing 1 required keyword argument: 'session'"
            )

        url = link.url
        url = url.split('#', 1)[0]

        # Check for VCS schemes that do not support lookup as web pages.
        from pip._internal.vcs import VcsSupport
        for scheme in VcsSupport.schemes:
            if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
                logger.debug('Cannot look at %s URL %s', scheme, link)
                return None

        try:
            if skip_archives:
                filename = link.filename
                for bad_ext in ARCHIVE_EXTENSIONS:
                    if filename.endswith(bad_ext):
                        content_type = cls._get_content_type(
                            url, session=session,
                        )
                        if content_type.lower().startswith('text/html'):
                            break
                        else:
                            logger.debug(
                                'Skipping page %s because of Content-Type: %s',
                                link,
                                content_type,
                            )
                            return

            logger.debug('Getting page %s', url)

            # Tack index.html onto file:// URLs that point to directories
            (scheme, netloc, path, params, query, fragment) = \
                urllib_parse.urlparse(url)
            if (scheme == 'file' and
                    os.path.isdir(urllib_request.url2pathname(path))):
                # add trailing slash if not present so urljoin doesn't trim
                # final segment
                if not url.endswith('/'):
                    url += '/'
                url = urllib_parse.urljoin(url, 'index.html')
                logger.debug(' file: URL is directory, getting %s', url)

            resp = session.get(
                url,
                headers={
                    "Accept": "text/html",
                    "Cache-Control": "max-age=600",
                },
            )
            resp.raise_for_status()

            # The check for archives above only works if the url ends with
            # something that looks like an archive. However that is not a
            # requirement of an url. Unless we issue a HEAD request on every
            # url we cannot know ahead of time for sure if something is HTML
            # or not. However we can check after we've downloaded it.
            content_type = resp.headers.get('Content-Type', 'unknown')
            if not content_type.lower().startswith("text/html"):
                logger.debug(
                    'Skipping page %s because of Content-Type: %s',
                    link,
                    content_type,
                )
                return

            inst = cls(resp.content, resp.url, resp.headers)
        except requests.HTTPError as exc:
            cls._handle_fail(link, exc, url)
        except SSLError as exc:
            reason = "There was a problem confirming the ssl certificate: "
            reason += str(exc)
            cls._handle_fail(link, reason, url, meth=logger.info)
        except requests.ConnectionError as exc:
            cls._handle_fail(link, "connection error: %s" % exc, url)
        except requests.Timeout:
            cls._handle_fail(link, "timed out", url)
        else:
            return inst
Example #17
0
def process_line(line, filename, line_number, finder=None, comes_from=None,
                 options=None, session=None, wheel_cache=None,
                 constraint=False):
    """Process a single requirements line; This can result in creating/yielding
    requirements, or updating the finder.

    For lines that contain requirements, the only options that have an effect
    are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
    requirement. Other options from SUPPORTED_OPTIONS may be present, but are
    ignored.

    For lines that do not contain requirements, the only options that have an
    effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
    be present, but are ignored. These lines may contain multiple options
    (although our docs imply only one is supported), and all our parsed and
    affect the finder.

    :param constraint: If True, parsing a constraints file.
    """
    parser = build_parser()
    defaults = parser.get_default_values()
    defaults.index_url = None
    if finder:
        # `finder.format_control` will be updated during parsing
        defaults.format_control = finder.format_control
    args_str, options_str = break_args_options(line)
    opts, _ = parser.parse_args(shlex.split(options_str), defaults)

    # preserve for the nested code path
    line_comes_from = '%s %s (line %s)' % (
        '-c' if constraint else '-r', filename, line_number)

    # yield a line requirement
    if args_str:
        isolated = options.isolated_mode if options else False
        if options:
            cmdoptions.check_install_build_global(options, opts)
        # get the options that apply to requirements
        req_options = {}
        for dest in SUPPORTED_OPTIONS_REQ_DEST:
            if dest in opts.__dict__ and opts.__dict__[dest]:
                req_options[dest] = opts.__dict__[dest]
        yield InstallRequirement.from_line(
            args_str, line_comes_from, constraint=constraint,
            isolated=isolated, options=req_options, wheel_cache=wheel_cache
        )

    # yield an editable requirement
    elif opts.editables:
        isolated = options.isolated_mode if options else False
        default_vcs = options.default_vcs if options else None
        yield InstallRequirement.from_editable(
            opts.editables[0], comes_from=line_comes_from,
            constraint=constraint, default_vcs=default_vcs, isolated=isolated,
            wheel_cache=wheel_cache
        )

    # parse a nested requirements file
    elif opts.requirements or opts.constraints:
        if opts.requirements:
            req_path = opts.requirements[0]
            nested_constraint = False
        else:
            req_path = opts.constraints[0]
            nested_constraint = True
        # original file is over http
        if SCHEME_RE.search(filename):
            # do a url join so relative paths work
            req_path = urllib_parse.urljoin(filename, req_path)
        # original file and nested file are paths
        elif not SCHEME_RE.search(req_path):
            # do a join so relative paths work
            req_dir = os.path.dirname(filename)
            req_path = os.path.join(os.path.dirname(filename), req_path)
        # TODO: Why not use `comes_from='-r {} (line {})'` here as well?
        parser = parse_requirements(
            req_path, finder, comes_from, options, session,
            constraint=nested_constraint, wheel_cache=wheel_cache
        )
        for req in parser:
            yield req

    # set finder options
    elif finder:
        if opts.index_url:
            finder.index_urls = [opts.index_url]
        if opts.use_wheel is False:
            finder.use_wheel = False
            pip.index.fmt_ctl_no_use_wheel(finder.format_control)
        if opts.no_index is True:
            finder.index_urls = []
        if opts.allow_all_external:
            finder.allow_all_external = opts.allow_all_external
        if opts.extra_index_urls:
            finder.index_urls.extend(opts.extra_index_urls)
        if opts.allow_external:
            finder.allow_external |= set(
                [normalize_name(v).lower() for v in opts.allow_external])
        if opts.allow_unverified:
            # Remove after 7.0
            finder.allow_unverified |= set(
                [normalize_name(v).lower() for v in opts.allow_unverified])
        if opts.find_links:
            # FIXME: it would be nice to keep track of the source
            # of the find_links: support a find-links local path
            # relative to a requirements file.
            value = opts.find_links[0]
            req_dir = os.path.dirname(os.path.abspath(filename))
            relative_to_reqs_file = os.path.join(req_dir, value)
            if os.path.exists(relative_to_reqs_file):
                value = relative_to_reqs_file
            finder.find_links.append(value)
Example #18
0
    def get_page(cls, link, skip_archives=True, session=None):
        if session is None:
            raise TypeError("get_page() missing 1 required keyword argument: 'session'")

        url = link.url
        url = url.split("#", 1)[0]

        # Check for VCS schemes that do not support lookup as web pages.
        from pip.vcs import VcsSupport

        for scheme in VcsSupport.schemes:
            if url.lower().startswith(scheme) and url[len(scheme)] in "+:":
                logger.debug("Cannot look at %s URL %s", scheme, link)
                return None

        try:
            if skip_archives:
                filename = link.filename
                for bad_ext in [".tar", ".tar.gz", ".tar.bz2", ".tgz", ".zip"]:
                    if filename.endswith(bad_ext):
                        content_type = cls._get_content_type(url, session=session)
                        if content_type.lower().startswith("text/html"):
                            break
                        else:
                            logger.debug("Skipping page %s because of Content-Type: %s", link, content_type)
                            return

            logger.debug("Getting page %s", url)

            # Tack index.html onto file:// URLs that point to directories
            (scheme, netloc, path, params, query, fragment) = urllib_parse.urlparse(url)
            if scheme == "file" and os.path.isdir(urllib_request.url2pathname(path)):
                # add trailing slash if not present so urljoin doesn't trim
                # final segment
                if not url.endswith("/"):
                    url += "/"
                url = urllib_parse.urljoin(url, "index.html")
                logger.debug(" file: URL is directory, getting %s", url)

            resp = session.get(url, headers={"Accept": "text/html", "Cache-Control": "max-age=600"})
            resp.raise_for_status()

            # The check for archives above only works if the url ends with
            #   something that looks like an archive. However that is not a
            #   requirement of an url. Unless we issue a HEAD request on every
            #   url we cannot know ahead of time for sure if something is HTML
            #   or not. However we can check after we've downloaded it.
            content_type = resp.headers.get("Content-Type", "unknown")
            if not content_type.lower().startswith("text/html"):
                logger.debug("Skipping page %s because of Content-Type: %s", link, content_type)
                return

            inst = cls(resp.content, resp.url, resp.headers, trusted=link.trusted)
        except requests.HTTPError as exc:
            level = 2 if exc.response.status_code == 404 else 1
            cls._handle_fail(link, exc, url, level=level)
        except requests.ConnectionError as exc:
            cls._handle_fail(link, "connection error: %s" % exc, url)
        except requests.Timeout:
            cls._handle_fail(link, "timed out", url)
        except SSLError as exc:
            reason = "There was a problem confirming the ssl certificate: " "%s" % exc
            cls._handle_fail(link, reason, url, level=2, meth=logger.info)
        else:
            return inst
Example #19
0
def process_line(
    line,  # type: Text
    filename,  # type: str
    line_number,  # type: int
    finder=None,  # type: Optional[PackageFinder]
    comes_from=None,  # type: Optional[str]
    options=None,  # type: Optional[optparse.Values]
    session=None,  # type: Optional[PipSession]
    wheel_cache=None,  # type: Optional[WheelCache]
    use_pep517=None,  # type: Optional[bool]
    constraint=False  # type: bool
):
    # type: (...) -> Iterator[InstallRequirement]
    """Process a single requirements line; This can result in creating/yielding
    requirements, or updating the finder.

    For lines that contain requirements, the only options that have an effect
    are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
    requirement. Other options from SUPPORTED_OPTIONS may be present, but are
    ignored.

    For lines that do not contain requirements, the only options that have an
    effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
    be present, but are ignored. These lines may contain multiple options
    (although our docs imply only one is supported), and all our parsed and
    affect the finder.

    :param constraint: If True, parsing a constraints file.
    :param options: OptionParser options that we may update
    """
    parser = build_parser(line)
    defaults = parser.get_default_values()
    defaults.index_url = None
    if finder:
        defaults.format_control = finder.format_control
    args_str, options_str = break_args_options(line)
    # Prior to 2.7.3, shlex cannot deal with unicode entries
    if sys.version_info < (2, 7, 3):
        # https://github.com/python/mypy/issues/1174
        options_str = options_str.encode('utf8')  # type: ignore
    # https://github.com/python/mypy/issues/1174
    opts, _ = parser.parse_args(
        shlex.split(options_str), defaults)  # type: ignore

    # preserve for the nested code path
    line_comes_from = '%s %s (line %s)' % (
        '-c' if constraint else '-r', filename, line_number,
    )

    # yield a line requirement
    if args_str:
        isolated = options.isolated_mode if options else False
        if options:
            cmdoptions.check_install_build_global(options, opts)
        # get the options that apply to requirements
        req_options = {}
        for dest in SUPPORTED_OPTIONS_REQ_DEST:
            if dest in opts.__dict__ and opts.__dict__[dest]:
                req_options[dest] = opts.__dict__[dest]
        yield install_req_from_line(
            args_str, line_comes_from, constraint=constraint,
            use_pep517=use_pep517,
            isolated=isolated, options=req_options, wheel_cache=wheel_cache
        )

    # yield an editable requirement
    elif opts.editables:
        isolated = options.isolated_mode if options else False
        yield install_req_from_editable(
            opts.editables[0], comes_from=line_comes_from,
            use_pep517=use_pep517,
            constraint=constraint, isolated=isolated, wheel_cache=wheel_cache
        )

    # parse a nested requirements file
    elif opts.requirements or opts.constraints:
        if opts.requirements:
            req_path = opts.requirements[0]
            nested_constraint = False
        else:
            req_path = opts.constraints[0]
            nested_constraint = True
        # original file is over http
        if SCHEME_RE.search(filename):
            # do a url join so relative paths work
            req_path = urllib_parse.urljoin(filename, req_path)
        # original file and nested file are paths
        elif not SCHEME_RE.search(req_path):
            # do a join so relative paths work
            req_path = os.path.join(os.path.dirname(filename), req_path)
        # TODO: Why not use `comes_from='-r {} (line {})'` here as well?
        parsed_reqs = parse_requirements(
            req_path, finder, comes_from, options, session,
            constraint=nested_constraint, wheel_cache=wheel_cache
        )
        for req in parsed_reqs:
            yield req

    # percolate hash-checking option upward
    elif opts.require_hashes:
        options.require_hashes = opts.require_hashes

    # set finder options
    elif finder:
        if opts.index_url:
            finder.index_urls = [opts.index_url]
        if opts.no_index is True:
            finder.index_urls = []
        if opts.extra_index_urls:
            finder.index_urls.extend(opts.extra_index_urls)
        if opts.find_links:
            # FIXME: it would be nice to keep track of the source
            # of the find_links: support a find-links local path
            # relative to a requirements file.
            value = opts.find_links[0]
            req_dir = os.path.dirname(os.path.abspath(filename))
            relative_to_reqs_file = os.path.join(req_dir, value)
            if os.path.exists(relative_to_reqs_file):
                value = relative_to_reqs_file
            finder.find_links.append(value)
        if opts.pre:
            finder.allow_all_prereleases = True
        if opts.trusted_hosts:
            finder.secure_origins.extend(
                ("*", host, "*") for host in opts.trusted_hosts)
Example #20
0
def _get_html_page(link, session=None):
    # type: (Link, Optional[PipSession]) -> Optional[HTMLPage]
    if session is None:
        raise TypeError(
            "_get_html_page() missing 1 required keyword argument: 'session'")

    url = link.url.split("#", 1)[0]

    # Check for VCS schemes that do not support lookup as web pages.
    vcs_scheme = _match_vcs_scheme(url)
    if vcs_scheme:
        logger.warning(
            "Cannot look at %s URL %s because it does not support "
            "lookup as web pages.",
            vcs_scheme,
            link,
        )
        return None

    # Tack index.html onto file:// URLs that point to directories
    scheme, _, path, _, _, _ = urllib_parse.urlparse(url)
    if scheme == "file" and os.path.isdir(urllib_request.url2pathname(path)):
        # add trailing slash if not present so urljoin doesn't trim
        # final segment
        if not url.endswith("/"):
            url += "/"
        url = urllib_parse.urljoin(url, "index.html")
        logger.debug(" file: URL is directory, getting %s", url)

    try:
        resp = _get_html_response(url, session=session)
    except _NotHTTP:
        logger.warning(
            "Skipping page %s because it looks like an archive, and cannot "
            "be checked by a HTTP HEAD request.",
            link,
        )
    except _NotHTML as exc:
        logger.warning(
            "Skipping page %s because the %s request got Content-Type: %s."
            "The only supported Content-Type is text/html",
            link,
            exc.request_desc,
            exc.content_type,
        )
    except NetworkConnectionError as exc:
        _handle_get_page_fail(link, exc)
    except RetryError as exc:
        _handle_get_page_fail(link, exc)
    except SSLError as exc:
        reason = "There was a problem confirming the ssl certificate: "
        reason += str(exc)
        _handle_get_page_fail(link, reason, meth=logger.info)
    except requests.ConnectionError as exc:
        _handle_get_page_fail(link, "connection error: {}".format(exc))
    except requests.Timeout:
        _handle_get_page_fail(link, "timed out")
    else:
        return _make_html_page(resp,
                               cache_link_parsing=link.cache_link_parsing)
    return None
Example #21
0
def parse_requirements(filename,
                       finder=None,
                       comes_from=None,
                       options=None,
                       session=None):
    if session is None:
        raise TypeError(
            "parse_requirements() missing 1 required keyword argument: "
            "'session'")

    skip_match = None
    skip_regex = options.skip_requirements_regex if options else None
    if skip_regex:
        skip_match = re.compile(skip_regex)
    reqs_file_dir = os.path.dirname(os.path.abspath(filename))
    filename, content = get_file_content(
        filename,
        comes_from=comes_from,
        session=session,
    )
    for line_number, line in enumerate(content.splitlines(), 1):
        line = line.strip()

        # Remove comments from file and all spaces before it
        line = re.sub(r"(^|\s)+#.*$", "", line)

        if not line:
            continue
        if skip_match and skip_match.search(line):
            continue
        if line.startswith(('-r', '--requirement')):
            req_url = _remove_prefixes(line, '-r', '--requirement')
            if _scheme_re.search(filename):
                # Relative to a URL
                req_url = urllib_parse.urljoin(filename, req_url)
            elif not _scheme_re.search(req_url):
                req_url = os.path.join(os.path.dirname(filename), req_url)
            for item in parse_requirements(req_url,
                                           finder,
                                           comes_from=filename,
                                           options=options,
                                           session=session):
                yield item
        elif line.startswith(('-Z', '--always-unzip')):
            # No longer used, but previously these were used in
            # requirement files, so we'll ignore.
            pass
        elif line.startswith(('-f', '--find-links')):
            find_links = _remove_prefixes(line, '-f', '--find-links')
            # FIXME: it would be nice to keep track of the source of
            # the find_links:
            # support a find-links local path relative to a requirements file
            relative_to_reqs_file = os.path.join(reqs_file_dir, find_links)
            if os.path.exists(relative_to_reqs_file):
                find_links = relative_to_reqs_file
            if finder:
                finder.find_links.append(find_links)
        elif line.startswith(('-i', '--index-url')):
            index_url = _remove_prefixes(line, '-i', '--index-url')
            if finder:
                finder.index_urls = [index_url]
        elif line.startswith('--extra-index-url'):
            line = _remove_prefix(line, '--extra-index-url')
            if finder:
                finder.index_urls.append(line)
        elif line.startswith('--use-wheel'):
            # Default in 1.5
            pass
        elif line.startswith('--no-use-wheel'):
            if finder:
                finder.use_wheel = False
        elif line.startswith('--no-index'):
            if finder:
                finder.index_urls = []
        elif line.startswith("--allow-external"):
            line = _remove_prefix(line, '--allow-external')
            if finder:
                finder.allow_external |= set([normalize_name(line).lower()])
        elif line.startswith("--allow-all-external"):
            if finder:
                finder.allow_all_external = True
        # Remove in 7.0
        elif line.startswith("--no-allow-external"):
            pass
        # Remove in 7.0
        elif line.startswith("--no-allow-insecure"):
            pass
        # Remove after 7.0
        elif line.startswith("--allow-insecure"):
            line = _remove_prefix(line, '--allow-insecure')
            if finder:
                finder.allow_unverified |= set([normalize_name(line).lower()])
        elif line.startswith("--allow-unverified"):
            line = _remove_prefix(line, '--allow-unverified')
            if finder:
                finder.allow_unverified |= set([normalize_name(line).lower()])
        else:
            comes_from = '-r %s (line %s)' % (filename, line_number)
            if line.startswith(('-e', '--editable')):
                editable = _remove_prefixes(line, '-e', '--editable')
                req = InstallRequirement.from_editable(
                    editable,
                    comes_from=comes_from,
                    default_vcs=options.default_vcs if options else None,
                    isolated=options.isolated_mode if options else False,
                )
            else:
                req = InstallRequirement.from_line(
                    line,
                    comes_from,
                    isolated=options.isolated_mode if options else False,
                )
            yield req
Example #22
0
def process_line(
        line,  # type: Text
        filename,  # type: str
        line_number,  # type: int
        finder=None,  # type: Optional[PackageFinder]
        comes_from=None,  # type: Optional[str]
        options=None,  # type: Optional[optparse.Values]
        session=None,  # type: Optional[PipSession]
        wheel_cache=None,  # type: Optional[WheelCache]
        use_pep517=None,  # type: Optional[bool]
        constraint=False,  # type: bool
):
    # type: (...) -> Iterator[InstallRequirement]
    """Process a single requirements line; This can result in creating/yielding
    requirements, or updating the finder.

    For lines that contain requirements, the only options that have an effect
    are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
    requirement. Other options from SUPPORTED_OPTIONS may be present, but are
    ignored.

    For lines that do not contain requirements, the only options that have an
    effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
    be present, but are ignored. These lines may contain multiple options
    (although our docs imply only one is supported), and all our parsed and
    affect the finder.

    :param constraint: If True, parsing a line_constraints file.
    :param options: OptionParser options that we may update
    """
    parser = build_parser(line)
    defaults = parser.get_default_values()
    defaults.index_url = None
    if finder:
        defaults.format_control = finder.format_control
    args_str, options_str = break_args_options(line)
    # Prior to 2.7.3, shlex cannot deal with unicode entries
    if sys.version_info < (2, 7, 3):
        # https://github.com/python/mypy/issues/1174
        options_str = options_str.encode('utf8')  # type: ignore
    # https://github.com/python/mypy/issues/1174
    opts, _ = parser.parse_args(shlex.split(options_str),
                                defaults)  # type: ignore

    # preserve for the nested code path
    line_comes_from = '%s %s (line %s)' % (
        '-c' if constraint else '-r',
        filename,
        line_number,
    )

    # yield a line requirement
    if args_str:
        isolated = options.isolated_mode if options else False
        if options:
            cmdoptions.check_install_build_global(options, opts)
        # get the options that apply to requirements
        req_options = {}
        for dest in SUPPORTED_OPTIONS_REQ_DEST:
            if dest in opts.__dict__ and opts.__dict__[dest]:
                req_options[dest] = opts.__dict__[dest]
        line_source = 'line {} of {}'.format(line_number, filename)
        yield install_req_from_line(
            args_str,
            comes_from=line_comes_from,
            use_pep517=use_pep517,
            isolated=isolated,
            options=req_options,
            wheel_cache=wheel_cache,
            constraint=constraint,
            line_source=line_source,
        )

    # yield an editable requirement
    elif opts.editables:
        isolated = options.isolated_mode if options else False
        yield install_req_from_editable(opts.editables[0],
                                        comes_from=line_comes_from,
                                        use_pep517=use_pep517,
                                        constraint=constraint,
                                        isolated=isolated,
                                        wheel_cache=wheel_cache)

    # parse a nested requirements file
    elif opts.requirements or opts.constraints:
        if opts.requirements:
            req_path = opts.requirements[0]
            nested_constraint = False
        else:
            req_path = opts.constraints[0]
            nested_constraint = True
        # original file is over http
        if SCHEME_RE.search(filename):
            # do a url join so relative paths work
            req_path = urllib_parse.urljoin(filename, req_path)
        # original file and nested file are paths
        elif not SCHEME_RE.search(req_path):
            # do a join so relative paths work
            req_path = os.path.join(os.path.dirname(filename), req_path)
        # TODO: Why not use `comes_from='-r {} (line {})'` here as well?
        parsed_reqs = parse_requirements(req_path,
                                         finder,
                                         comes_from,
                                         options,
                                         session,
                                         constraint=nested_constraint,
                                         wheel_cache=wheel_cache)
        for req in parsed_reqs:
            yield req

    # percolate hash-checking option upward
    elif opts.require_hashes:
        options.require_hashes = opts.require_hashes

    # set finder options
    elif finder:
        find_links = finder.find_links
        index_urls = finder.index_urls
        if opts.index_url:
            index_urls = [opts.index_url]
        if opts.no_index is True:
            index_urls = []
        if opts.extra_index_urls:
            index_urls.extend(opts.extra_index_urls)
        if opts.find_links:
            # FIXME: it would be nice to keep track of the source
            # of the find_links: support a find-links local path
            # relative to a requirements file.
            value = opts.find_links[0]
            req_dir = os.path.dirname(os.path.abspath(filename))
            relative_to_reqs_file = os.path.join(req_dir, value)
            if os.path.exists(relative_to_reqs_file):
                value = relative_to_reqs_file
            find_links.append(value)

        search_scope = SearchScope(
            find_links=find_links,
            index_urls=index_urls,
        )
        finder.search_scope = search_scope

        if opts.pre:
            finder.set_allow_all_prereleases()
        for host in opts.trusted_hosts or []:
            source = 'line {} of {}'.format(line_number, filename)
            finder.add_trusted_host(host, source=source)
Example #23
0
 def _url_for_path(self, path):
     return urllib_parse.urljoin(self.url, path)
Example #24
0
def process_line(line,
                 filename,
                 line_number,
                 finder=None,
                 comes_from=None,
                 options=None,
                 session=None,
                 wheel_cache=None,
                 constraint=False):
    """Process a single requirements line; This can result in creating/yielding
    requirements, or updating the finder.

    For lines that contain requirements, the only options that have an effect
    are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
    requirement. Other options from SUPPORTED_OPTIONS may be present, but are
    ignored.

    For lines that do not contain requirements, the only options that have an
    effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
    be present, but are ignored. These lines may contain multiple options
    (although our docs imply only one is supported), and all our parsed and
    affect the finder.

    :param constraint: If True, parsing a constraints file.
    """
    parser = build_parser()
    defaults = parser.get_default_values()
    defaults.index_url = None
    if finder:
        # `finder.format_control` will be updated during parsing
        defaults.format_control = finder.format_control
    args_str, options_str = break_args_options(line)
    opts, _ = parser.parse_args(shlex.split(options_str), defaults)

    # preserve for the nested code path
    line_comes_from = '%s %s (line %s)' % ('-c' if constraint else '-r',
                                           filename, line_number)

    # yield a line requirement
    if args_str:
        isolated = options.isolated_mode if options else False
        if options:
            cmdoptions.check_install_build_global(options, opts)
        # get the options that apply to requirements
        req_options = {}
        for dest in SUPPORTED_OPTIONS_REQ_DEST:
            if dest in opts.__dict__ and opts.__dict__[dest]:
                req_options[dest] = opts.__dict__[dest]
        yield InstallRequirement.from_line(args_str,
                                           line_comes_from,
                                           constraint=constraint,
                                           isolated=isolated,
                                           options=req_options,
                                           wheel_cache=wheel_cache)

    # yield an editable requirement
    elif opts.editables:
        isolated = options.isolated_mode if options else False
        default_vcs = options.default_vcs if options else None
        yield InstallRequirement.from_editable(opts.editables[0],
                                               comes_from=line_comes_from,
                                               constraint=constraint,
                                               default_vcs=default_vcs,
                                               isolated=isolated,
                                               wheel_cache=wheel_cache)

    # parse a nested requirements file
    elif opts.requirements or opts.constraints:
        if opts.requirements:
            req_path = opts.requirements[0]
            nested_constraint = False
        else:
            req_path = opts.constraints[0]
            nested_constraint = True
        # original file is over http
        if SCHEME_RE.search(filename):
            # do a url join so relative paths work
            req_path = urllib_parse.urljoin(filename, req_path)
        # original file and nested file are paths
        elif not SCHEME_RE.search(req_path):
            # do a join so relative paths work
            req_dir = os.path.dirname(filename)
            req_path = os.path.join(os.path.dirname(filename), req_path)
        # TODO: Why not use `comes_from='-r {} (line {})'` here as well?
        parser = parse_requirements(req_path,
                                    finder,
                                    comes_from,
                                    options,
                                    session,
                                    constraint=nested_constraint,
                                    wheel_cache=wheel_cache)
        for req in parser:
            yield req

    # set finder options
    elif finder:
        if opts.index_url:
            finder.index_urls = [opts.index_url]
        if opts.use_wheel is False:
            finder.use_wheel = False
            pip.index.fmt_ctl_no_use_wheel(finder.format_control)
        if opts.no_index is True:
            finder.index_urls = []
        if opts.allow_all_external:
            finder.allow_all_external = opts.allow_all_external
        if opts.extra_index_urls:
            finder.index_urls.extend(opts.extra_index_urls)
        if opts.allow_external:
            finder.allow_external |= set(
                [normalize_name(v).lower() for v in opts.allow_external])
        if opts.allow_unverified:
            # Remove after 7.0
            finder.allow_unverified |= set(
                [normalize_name(v).lower() for v in opts.allow_unverified])
        if opts.find_links:
            # FIXME: it would be nice to keep track of the source
            # of the find_links: support a find-links local path
            # relative to a requirements file.
            value = opts.find_links[0]
            req_dir = os.path.dirname(os.path.abspath(filename))
            relative_to_reqs_file = os.path.join(req_dir, value)
            if os.path.exists(relative_to_reqs_file):
                value = relative_to_reqs_file
            finder.find_links.append(value)
Example #25
0
def process_line(line, filename, line_number, finder=None, comes_from=None,
                 options=None, session=None, wheel_cache=None,
                 constraint=False):
    """Process a single requirements line; This can result in creating/yielding
    requirements, or updating the finder.

    For lines that contain requirements, the only options that have an effect
    are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
    requirement. Other options from SUPPORTED_OPTIONS may be present, but are
    ignored.

    For lines that do not contain requirements, the only options that have an
    effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
    be present, but are ignored. These lines may contain multiple options
    (although our docs imply only one is supported), and all our parsed and
    affect the finder.

    :param constraint: If True, parsing a constraints file.
    :param options: OptionParser options that we may update
    """
    parser = build_parser()
    defaults = parser.get_default_values()
    defaults.index_url = None
    if finder:
        # `finder.format_control` will be updated during parsing
        defaults.format_control = finder.format_control
    args_str, options_str = break_args_options(line)
    if sys.version_info < (2, 7, 3):
        # Prior to 2.7.3, shlex cannot deal with unicode entries
        options_str = options_str.encode('utf8')
    opts, _ = parser.parse_args(shlex.split(options_str), defaults)

    # preserve for the nested code path
    line_comes_from = '%s %s (line %s)' % (
        '-c' if constraint else '-r', filename, line_number)

    # yield a line requirement
    if args_str:
        isolated = options.isolated_mode if options else False
        if options:
            cmdoptions.check_install_build_global(options, opts)
        # get the options that apply to requirements
        req_options = {}
        for dest in SUPPORTED_OPTIONS_REQ_DEST:
            if dest in opts.__dict__ and opts.__dict__[dest]:
                req_options[dest] = opts.__dict__[dest]
        yield InstallRequirement.from_line(
            args_str, line_comes_from, constraint=constraint,
            isolated=isolated, options=req_options, wheel_cache=wheel_cache
        )

    # yield an editable requirement
    elif opts.editables:
        isolated = options.isolated_mode if options else False
        default_vcs = options.default_vcs if options else None
        yield InstallRequirement.from_editable(
            opts.editables[0], comes_from=line_comes_from,
            constraint=constraint, default_vcs=default_vcs, isolated=isolated,
            wheel_cache=wheel_cache
        )

    # parse a nested requirements file
    elif opts.requirements or opts.constraints:
        if opts.requirements:
            req_path = opts.requirements[0]
            nested_constraint = False
        else:
            req_path = opts.constraints[0]
            nested_constraint = True
        # original file is over http
        if SCHEME_RE.search(filename):
            # do a url join so relative paths work
            req_path = urllib_parse.urljoin(filename, req_path)
        # original file and nested file are paths
        elif not SCHEME_RE.search(req_path):
            # do a join so relative paths work
            req_path = os.path.join(os.path.dirname(filename), req_path)
        # TODO: Why not use `comes_from='-r {} (line {})'` here as well?
        parser = parse_requirements(
            req_path, finder, comes_from, options, session,
            constraint=nested_constraint, wheel_cache=wheel_cache
        )
        for req in parser:
            yield req

    # percolate hash-checking option upward
    elif opts.require_hashes:
        options.require_hashes = opts.require_hashes

    # set finder options
    elif finder:
        if opts.allow_external:
            warnings.warn(
                "--allow-external has been deprecated and will be removed in "
                "the future. Due to changes in the repository protocol, it no "
                "longer has any effect.",
                RemovedInPip10Warning,
            )

        if opts.allow_all_external:
            warnings.warn(
                "--allow-all-external has been deprecated and will be removed "
                "in the future. Due to changes in the repository protocol, it "
                "no longer has any effect.",
                RemovedInPip10Warning,
            )

        if opts.allow_unverified:
            warnings.warn(
                "--allow-unverified has been deprecated and will be removed "
                "in the future. Due to changes in the repository protocol, it "
                "no longer has any effect.",
                RemovedInPip10Warning,
            )

        if opts.index_url:
            finder.index_urls = [opts.index_url]
        if opts.use_wheel is False:
            finder.use_wheel = False
            pip.index.fmt_ctl_no_use_wheel(finder.format_control)
        if opts.no_index is True:
            finder.index_urls = []
        if opts.extra_index_urls:
            finder.index_urls.extend(opts.extra_index_urls)
        if opts.find_links:
            # FIXME: it would be nice to keep track of the source
            # of the find_links: support a find-links local path
            # relative to a requirements file.
            value = opts.find_links[0]
            req_dir = os.path.dirname(os.path.abspath(filename))
            relative_to_reqs_file = os.path.join(req_dir, value)
            if os.path.exists(relative_to_reqs_file):
                value = relative_to_reqs_file
            finder.find_links.append(value)
        if opts.pre:
            finder.allow_all_prereleases = True
        if opts.process_dependency_links:
            finder.process_dependency_links = True
        if opts.trusted_hosts:
            finder.secure_origins.extend(
                ("*", host, "*") for host in opts.trusted_hosts)
Example #26
0
def parse_requirements(filename, finder=None, comes_from=None, options=None,
                       session=None):
    if session is None:
        raise TypeError(
            "parse_requirements() missing 1 required keyword argument: "
            "'session'"
        )

    skip_match = None
    skip_regex = options.skip_requirements_regex if options else None
    if skip_regex:
        skip_match = re.compile(skip_regex)
    reqs_file_dir = os.path.dirname(os.path.abspath(filename))
    filename, content = get_file_content(
        filename,
        comes_from=comes_from,
        session=session,
    )
    for line_number, line in enumerate(content.splitlines(), 1):
        line = line.strip()

        # Remove comments from file and all spaces before it
        line = re.sub(r"(^|\s)+#.*$", "", line)

        if not line:
            continue
        if skip_match and skip_match.search(line):
            continue
        if line.startswith(('-r', '--requirement')):
            req_url = _remove_prefixes(line, '-r', '--requirement')
            if _scheme_re.search(filename):
                # Relative to a URL
                req_url = urllib_parse.urljoin(filename, req_url)
            elif not _scheme_re.search(req_url):
                req_url = os.path.join(os.path.dirname(filename), req_url)
            for item in parse_requirements(
                    req_url, finder,
                    comes_from=filename,
                    options=options,
                    session=session):
                yield item
        elif line.startswith(('-Z', '--always-unzip')):
            # No longer used, but previously these were used in
            # requirement files, so we'll ignore.
            pass
        elif line.startswith(('-f', '--find-links')):
            find_links = _remove_prefixes(line, '-f', '--find-links')
            # FIXME: it would be nice to keep track of the source of
            # the find_links:
            # support a find-links local path relative to a requirements file
            relative_to_reqs_file = os.path.join(reqs_file_dir, find_links)
            if os.path.exists(relative_to_reqs_file):
                find_links = relative_to_reqs_file
            if finder:
                finder.find_links.append(find_links)
        elif line.startswith(('-i', '--index-url')):
            index_url = _remove_prefixes(line, '-i', '--index-url')
            if finder:
                finder.index_urls = [index_url]
        elif line.startswith('--extra-index-url'):
            line = _remove_prefix(line, '--extra-index-url')
            if finder:
                finder.index_urls.append(line)
        elif line.startswith('--use-wheel'):
            # Default in 1.5
            pass
        elif line.startswith('--no-use-wheel'):
            if finder:
                finder.use_wheel = False
        elif line.startswith('--no-index'):
            if finder:
                finder.index_urls = []
        elif line.startswith("--allow-external"):
            line = _remove_prefix(line, '--allow-external')
            if finder:
                finder.allow_external |= set([normalize_name(line).lower()])
        elif line.startswith("--allow-all-external"):
            if finder:
                finder.allow_all_external = True
        # Remove in 7.0
        elif line.startswith("--no-allow-external"):
            pass
        # Remove in 7.0
        elif line.startswith("--no-allow-insecure"):
            pass
        # Remove after 7.0
        elif line.startswith("--allow-insecure"):
            line = _remove_prefix(line, '--allow-insecure')
            if finder:
                finder.allow_unverified |= set([normalize_name(line).lower()])
        elif line.startswith("--allow-unverified"):
            line = _remove_prefix(line, '--allow-unverified')
            if finder:
                finder.allow_unverified |= set([normalize_name(line).lower()])
        else:
            comes_from = '-r %s (line %s)' % (filename, line_number)
            if line.startswith(('-e', '--editable')):
                editable = _remove_prefixes(line, '-e', '--editable')
                req = InstallRequirement.from_editable(
                    editable,
                    comes_from=comes_from,
                    default_vcs=options.default_vcs if options else None,
                    isolated=options.isolated_mode if options else False,
                )
            else:
                req = InstallRequirement.from_line(
                    line,
                    comes_from,
                    isolated=options.isolated_mode if options else False,
                )
            yield req
Example #27
0
 def _url_for_path(self, path):
     # type: (str) -> str
     return urllib_parse.urljoin(self.url, path)
Example #28
0
        # Remove comments from file
        line = re.sub(r"(^|\s)#.*$", "", line)

        if not line or line.startswith('#'):
            continue
        if skip_match and skip_match.search(line):
            continue
        if line.startswith('-r') or line.startswith('--requirement'):
            if line.startswith('-r'):
                req_url = line[2:].strip()
            else:
                req_url = line[len('--requirement'):].strip().strip('=')
>>>>>>> bde4533e29dfedadf6bcf9d451baa615bc828a59
            if _scheme_re.search(filename):
                # Relative to a URL
                req_url = urllib_parse.urljoin(filename, req_url)
            elif not _scheme_re.search(req_url):
                req_url = os.path.join(os.path.dirname(filename), req_url)
            for item in parse_requirements(
                    req_url, finder,
                    comes_from=filename,
                    options=options,
                    session=session):
                yield item
<<<<<<< HEAD
        elif line.startswith(('-Z', '--always-unzip')):
            # No longer used, but previously these were used in
            # requirement files, so we'll ignore.
            pass
        elif line.startswith(('-f', '--find-links')):
            find_links = _remove_prefixes(line, '-f', '--find-links')
Example #29
0
 def _url_for_path(self, path):
     return urllib_parse.urljoin(self.url, path)
Example #30
0
def process_line(
    line,  # type: Text
    filename,  # type: str
    line_number,  # type: int
    finder=None,  # type: Optional[PackageFinder]
    comes_from=None,  # type: Optional[str]
    options=None,  # type: Optional[optparse.Values]
    session=None,  # type: Optional[PipSession]
    wheel_cache=None,  # type: Optional[WheelCache]
    use_pep517=None,  # type: Optional[bool]
    constraint=False,  # type: bool
):
    # type: (...) -> Iterator[InstallRequirement]
    """Process a single requirements line; This can result in creating/yielding
    requirements, or updating the finder.

    For lines that contain requirements, the only options that have an effect
    are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
    requirement. Other options from SUPPORTED_OPTIONS may be present, but are
    ignored.

    For lines that do not contain requirements, the only options that have an
    effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
    be present, but are ignored. These lines may contain multiple options
    (although our docs imply only one is supported), and all our parsed and
    affect the finder.

    :param constraint: If True, parsing a constraints file.
    :param options: OptionParser options that we may update
    """
    line_parser = get_line_parser(finder)
    try:
        args_str, opts = line_parser(line)
    except OptionParsingError as e:
        # add offending line
        msg = 'Invalid requirement: %s\n%s' % (line, e.msg)
        raise RequirementsFileParseError(msg)

    # parse a nested requirements file
    if (
        not args_str and
        not opts.editables and
        (opts.requirements or opts.constraints)
    ):
        if opts.requirements:
            req_path = opts.requirements[0]
            nested_constraint = False
        else:
            req_path = opts.constraints[0]
            nested_constraint = True
        # original file is over http
        if SCHEME_RE.search(filename):
            # do a url join so relative paths work
            req_path = urllib_parse.urljoin(filename, req_path)
        # original file and nested file are paths
        elif not SCHEME_RE.search(req_path):
            # do a join so relative paths work
            req_path = os.path.join(os.path.dirname(filename), req_path)
        parsed_reqs = parse_requirements(
            req_path, finder, comes_from, options, session,
            constraint=nested_constraint, wheel_cache=wheel_cache
        )
        for req in parsed_reqs:
            yield req
        return

    # preserve for the nested code path
    line_comes_from = '%s %s (line %s)' % (
        '-c' if constraint else '-r', filename, line_number,
    )

    # yield a line requirement
    if args_str:
        isolated = options.isolated_mode if options else False
        if options:
            cmdoptions.check_install_build_global(options, opts)
        # get the options that apply to requirements
        req_options = {}
        for dest in SUPPORTED_OPTIONS_REQ_DEST:
            if dest in opts.__dict__ and opts.__dict__[dest]:
                req_options[dest] = opts.__dict__[dest]
        line_source = 'line {} of {}'.format(line_number, filename)
        yield install_req_from_line(
            args_str,
            comes_from=line_comes_from,
            use_pep517=use_pep517,
            isolated=isolated,
            options=req_options,
            wheel_cache=wheel_cache,
            constraint=constraint,
            line_source=line_source,
        )

    # yield an editable requirement
    elif opts.editables:
        isolated = options.isolated_mode if options else False
        yield install_req_from_editable(
            opts.editables[0], comes_from=line_comes_from,
            use_pep517=use_pep517,
            constraint=constraint, isolated=isolated, wheel_cache=wheel_cache
        )

    # percolate hash-checking option upward
    elif opts.require_hashes:
        options.require_hashes = opts.require_hashes

    # set finder options
    elif finder:
        find_links = finder.find_links
        index_urls = finder.index_urls
        if opts.index_url:
            index_urls = [opts.index_url]
        if opts.no_index is True:
            index_urls = []
        if opts.extra_index_urls:
            index_urls.extend(opts.extra_index_urls)
        if opts.find_links:
            # FIXME: it would be nice to keep track of the source
            # of the find_links: support a find-links local path
            # relative to a requirements file.
            value = opts.find_links[0]
            req_dir = os.path.dirname(os.path.abspath(filename))
            relative_to_reqs_file = os.path.join(req_dir, value)
            if os.path.exists(relative_to_reqs_file):
                value = relative_to_reqs_file
            find_links.append(value)

        search_scope = SearchScope(
            find_links=find_links,
            index_urls=index_urls,
        )
        finder.search_scope = search_scope

        if opts.pre:
            finder.set_allow_all_prereleases()
        for host in opts.trusted_hosts or []:
            source = 'line {} of {}'.format(line_number, filename)
            session.add_trusted_host(host, source=source)