def links(self): """Yields all links in the page""" for anchor in self.parsed.findall(".//a"): if anchor.get("href"): href = anchor.get("href") url = self.clean_link(urllib_parse.urljoin(self.base_url, href)) yield Link(url, self)
def path_to_url(path): """ Convert a path to a file: URL. The path will be made absolute and have quoted path parts. """ path = os.path.normpath(os.path.abspath(path)) url = urllib_parse.urljoin('file:', urllib_request.pathname2url(path)) return url
def links(self): """Yields all links in the page""" for anchor in self.parsed.findall(".//a"): if anchor.get("href"): href = anchor.get("href") url = self.clean_link( urllib_parse.urljoin(self.base_url, href) ) pyrequire = anchor.get('data-requires-python') pyrequire = unescape(pyrequire) if pyrequire else None yield Link(url, self, requires_python=pyrequire)
def _get_html_page(link, session=None): # type: (Link, Optional[PipSession]) -> Optional[HTMLPage] if session is None: raise TypeError( "_get_html_page() missing 1 required keyword argument: 'session'" ) url = link.url.split('#', 1)[0] # Check for VCS schemes that do not support lookup as web pages. vcs_scheme = _match_vcs_scheme(url) if vcs_scheme: logger.debug('Cannot look at %s URL %s', vcs_scheme, link) return None # Tack index.html onto file:// URLs that point to directories scheme, _, path, _, _, _ = urllib_parse.urlparse(url) if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))): # add trailing slash if not present so urljoin doesn't trim # final segment if not url.endswith('/'): url += '/' url = urllib_parse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s', url) try: resp = _get_html_response(url, session=session) except _NotHTTP as exc: logger.debug( 'Skipping page %s because it looks like an archive, and cannot ' 'be checked by HEAD.', link, ) except _NotHTML as exc: logger.debug( 'Skipping page %s because the %s request got Content-Type: %s', link, exc.request_desc, exc.content_type, ) except requests.HTTPError as exc: _handle_get_page_fail(link, exc) except RetryError as exc: _handle_get_page_fail(link, exc) except SSLError as exc: reason = "There was a problem confirming the ssl certificate: " reason += str(exc) _handle_get_page_fail(link, reason, meth=logger.info) except requests.ConnectionError as exc: _handle_get_page_fail(link, "connection error: %s" % exc) except requests.Timeout: _handle_get_page_fail(link, "timed out") else: return HTMLPage(resp.content, resp.url, resp.headers) return None
def explicit_rel_links(self, rels=("homepage", "download")): """Yields all links with the given relations""" rels = set(rels) for anchor in self.parsed.findall(".//a"): if anchor.get("rel") and anchor.get("href"): found_rels = set(anchor.get("rel").split()) # Determine the intersection between what rels were found and # what rels were being looked for if found_rels & rels: href = anchor.get("href") url = self.clean_link(urllib_parse.urljoin(self.base_url, href)) yield Link(url, self, trusted=False)
def iter_links(self): """Yields all links in the page""" document = html5lib.parse( self.content, transport_encoding=_get_encoding_from_headers(self.headers), namespaceHTMLElements=False, ) base_url = _determine_base_url(document, self.url) for anchor in document.findall(".//a"): if anchor.get("href"): href = anchor.get("href") url = _clean_link(urllib_parse.urljoin(base_url, href)) pyrequire = anchor.get('data-requires-python') pyrequire = unescape(pyrequire) if pyrequire else None yield Link(url, self.url, requires_python=pyrequire)
def links(self): """Yields all links in the page""" for anchor in self.parsed.findall(".//a"): if anchor.get("href"): href = anchor.get("href") url = self.clean_link(urllib_parse.urljoin(self.base_url, href)) # Determine if this link is internal. If that distinction # doesn't make sense in this context, then we don't make # any distinction. internal = None if self.api_version and self.api_version >= 2: # Only api_versions >= 2 have a distinction between # external and internal links internal = bool(anchor.get("rel") and "internal" in anchor.get("rel").split()) yield Link(url, self, internal=internal)
def scraped_rel_links(self): # Can we get rid of this horrible horrible method? for regex in (self._homepage_re, self._download_re): match = regex.search(self.content) if not match: continue href_match = self._href_re.search(self.content, pos=match.end()) if not href_match: continue url = href_match.group(1) or href_match.group(2) or href_match.group(3) if not url: continue try: url = url.decode("ascii") except UnicodeDecodeError: continue url = self.clean_link(urllib_parse.urljoin(self.base_url, url)) yield Link(url, self, trusted=False, _deprecated_regex=True)
def process_line(line, filename, line_number, finder=None, comes_from=None, options=None, session=None, wheel_cache=None, constraint=False): """Process a single requirements line; This can result in creating/yielding requirements, or updating the finder. For lines that contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS_REQ, and they are scoped to the requirement. Other options from SUPPORTED_OPTIONS may be present, but are ignored. For lines that do not contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may be present, but are ignored. These lines may contain multiple options (although our docs imply only one is supported), and all our parsed and affect the finder. :param constraint: If True, parsing a constraints file. :param options: OptionParser options that we may update """ parser = build_parser(line) defaults = parser.get_default_values() defaults.index_url = None if finder: # `finder.format_control` will be updated during parsing defaults.format_control = finder.format_control args_str, options_str = break_args_options(line) if sys.version_info < (2, 7, 3): # Prior to 2.7.3, shlex cannot deal with unicode entries options_str = options_str.encode('utf8') opts, _ = parser.parse_args(shlex.split(options_str), defaults) # preserve for the nested code path line_comes_from = '%s %s (line %s)' % ( '-c' if constraint else '-r', filename, line_number, ) # yield a line requirement if args_str: isolated = options.isolated_mode if options else False if options: cmdoptions.check_install_build_global(options, opts) # get the options that apply to requirements req_options = {} for dest in SUPPORTED_OPTIONS_REQ_DEST: if dest in opts.__dict__ and opts.__dict__[dest]: req_options[dest] = opts.__dict__[dest] yield InstallRequirement.from_line(args_str, line_comes_from, constraint=constraint, isolated=isolated, options=req_options, wheel_cache=wheel_cache) # yield an editable requirement elif opts.editables: isolated = options.isolated_mode if options else False yield InstallRequirement.from_editable(opts.editables[0], comes_from=line_comes_from, constraint=constraint, isolated=isolated, wheel_cache=wheel_cache) # parse a nested requirements file elif opts.requirements or opts.constraints: if opts.requirements: req_path = opts.requirements[0] nested_constraint = False else: req_path = opts.constraints[0] nested_constraint = True # original file is over M_http if SCHEME_RE.search(filename): # do a url join so relative paths work req_path = urllib_parse.urljoin(filename, req_path) # original file and nested file are paths elif not SCHEME_RE.search(req_path): # do a join so relative paths work req_path = os.path.join(os.path.dirname(filename), req_path) # TODO: Why not use `comes_from='-r {} (line {})'` here as well? parser = parse_requirements(req_path, finder, comes_from, options, session, constraint=nested_constraint, wheel_cache=wheel_cache) for req in parser: yield req # percolate hash-checking option upward elif opts.require_hashes: options.require_hashes = opts.require_hashes # set finder options elif finder: if opts.index_url: finder.index_urls = [opts.index_url] if opts.no_index is True: finder.index_urls = [] if opts.extra_index_urls: finder.index_urls.extend(opts.extra_index_urls) if opts.find_links: # FIXME: it would be nice to keep track of the source # of the find_links: support a find-links local path # relative to a requirements file. value = opts.find_links[0] req_dir = os.path.dirname(os.path.abspath(filename)) relative_to_reqs_file = os.path.join(req_dir, value) if os.path.exists(relative_to_reqs_file): value = relative_to_reqs_file finder.find_links.append(value) if opts.pre: finder.allow_all_prereleases = True if opts.process_dependency_links: finder.process_dependency_links = True if opts.trusted_hosts: finder.secure_origins.extend( ("*", host, "*") for host in opts.trusted_hosts)
def _get_html_page(link, session=None): if session is None: raise TypeError( "_get_html_page() missing 1 required keyword argument: 'session'") url = link.url url = url.split('#', 1)[0] # Check for VCS schemes that do not support lookup as web pages. from pip._internal.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in '+:': logger.debug('Cannot look at %s URL %s', scheme, link) return None try: filename = link.filename for bad_ext in ARCHIVE_EXTENSIONS: if filename.endswith(bad_ext): content_type = _get_content_type(url, session=session) if content_type.lower().startswith('text/html'): break else: logger.debug( 'Skipping page %s because of Content-Type: %s', link, content_type, ) return logger.debug('Getting page %s', url) # Tack blank.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = \ urllib_parse.urlparse(url) if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))): # add trailing slash if not present so urljoin doesn't trim # final segment if not url.endswith('/'): url += '/' url = urllib_parse.urljoin(url, 'blank.html') logger.debug(' file: URL is directory, getting %s', url) resp = session.get( url, headers={ "Accept": "text/html", # We don't want to blindly returned cached data for # /simple/, because authors generally expecting that # twine upload && pip install will function, but if # they've done a pip install in the last ~10 minutes # it won't. Thus by setting this to zero we will not # blindly use any cached data, however the benefit of # using max-age=0 instead of no-cache, is that we will # still support conditional requests, so we will still # minimize traffic sent in cases where the page hasn't # changed at all, we will just always incur the round # trip for the conditional GET now instead of only # once per 10 minutes. # For more information, please see pypa/pip#5670. "Cache-Control": "max-age=0", }, ) resp.raise_for_status() # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement of an url. Unless we issue a HEAD request on every # url we cannot know ahead of time for sure if something is HTML # or not. However we can check after we've downloaded it. content_type = resp.headers.get('Content-Type', 'unknown') if not content_type.lower().startswith("text/html"): logger.debug( 'Skipping page %s because of Content-Type: %s', link, content_type, ) return inst = HTMLPage(resp.content, resp.url, resp.headers) except HTTPError as exc: _handle_get_page_fail(link, exc, url) except SSLError as exc: reason = "There was a problem confirming the ssl certificate: " reason += str(exc) _handle_get_page_fail(link, reason, url, meth=logger.info) except requests.ConnectionError as exc: _handle_get_page_fail(link, "connection error: %s" % exc, url) except requests.Timeout: _handle_get_page_fail(link, "timed out", url) else: return inst
def get_page(cls, link, skip_archives=True, session=None): if session is None: raise TypeError( "get_page() missing 1 required keyword argument: 'session'") url = link.url url = url.split('#', 1)[0] # Check for VCS schemes that do not support lookup as web pages. from pip.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in '+:': logger.debug('Cannot look at %s URL %s', scheme, link) return None try: if skip_archives: filename = link.filename for bad_ext in ARCHIVE_EXTENSIONS: if filename.endswith(bad_ext): content_type = cls._get_content_type( url, session=session, ) if content_type.lower().startswith('text/html'): break else: logger.debug( 'Skipping page %s because of Content-Type: %s', link, content_type, ) return logger.debug('Getting page %s', url) # Tack index.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = \ urllib_parse.urlparse(url) if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))): # add trailing slash if not present so urljoin doesn't trim # final segment if not url.endswith('/'): url += '/' url = urllib_parse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s', url) resp = session.get( url, headers={ "Accept": "text/html", "Cache-Control": "max-age=600", }, ) resp.raise_for_status() # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement of an url. Unless we issue a HEAD request on every # url we cannot know ahead of time for sure if something is HTML # or not. However we can check after we've downloaded it. content_type = resp.headers.get('Content-Type', 'unknown') if not content_type.lower().startswith("text/html"): logger.debug( 'Skipping page %s because of Content-Type: %s', link, content_type, ) return inst = cls(resp.content, resp.url, resp.headers) except requests.HTTPError as exc: cls._handle_fail(link, exc, url) except SSLError as exc: reason = ("There was a problem confirming the ssl certificate: " "%s" % exc) cls._handle_fail(link, reason, url, meth=logger.info) except requests.ConnectionError as exc: cls._handle_fail(link, "connection error: %s" % exc, url) except requests.Timeout: cls._handle_fail(link, "timed out", url) else: return inst
def _url_for_path(self, path): # type: (str) -> str return urllib_parse.urljoin(self.url, path)
def _get_html_page(link, session=None): if session is None: raise TypeError( "_get_html_page() missing 1 required keyword argument: 'session'" ) url = link.url url = url.split('#', 1)[0] # Check for VCS schemes that do not support lookup as web pages. from pip._internal.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in '+:': logger.debug('Cannot look at %s URL %s', scheme, link) return None try: filename = link.filename for bad_ext in ARCHIVE_EXTENSIONS: if filename.endswith(bad_ext): content_type = _get_content_type(url, session=session) if content_type.lower().startswith('text/html'): break else: logger.debug( 'Skipping page %s because of Content-Type: %s', link, content_type, ) return logger.debug('Getting page %s', url) # Tack index.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = \ urllib_parse.urlparse(url) if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))): # add trailing slash if not present so urljoin doesn't trim # final segment if not url.endswith('/'): url += '/' url = urllib_parse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s', url) resp = session.get( url, headers={ "Accept": "text/html", # We don't want to blindly returned cached data for # /simple/, because authors generally expecting that # twine upload && pip install will function, but if # they've done a pip install in the last ~10 minutes # it won't. Thus by setting this to zero we will not # blindly use any cached data, however the benefit of # using max-age=0 instead of no-cache, is that we will # still support conditional requests, so we will still # minimize traffic sent in cases where the page hasn't # changed at all, we will just always incur the round # trip for the conditional GET now instead of only # once per 10 minutes. # For more information, please see pypa/pip#5670. "Cache-Control": "max-age=0", }, ) resp.raise_for_status() # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement of an url. Unless we issue a HEAD request on every # url we cannot know ahead of time for sure if something is HTML # or not. However we can check after we've downloaded it. content_type = resp.headers.get('Content-Type', 'unknown') if not content_type.lower().startswith("text/html"): logger.debug( 'Skipping page %s because of Content-Type: %s', link, content_type, ) return inst = HTMLPage(resp.content, resp.url, resp.headers) except requests.HTTPError as exc: _handle_get_page_fail(link, exc, url) except SSLError as exc: reason = "There was a problem confirming the ssl certificate: " reason += str(exc) _handle_get_page_fail(link, reason, url, meth=logger.info) except requests.ConnectionError as exc: _handle_get_page_fail(link, "connection error: %s" % exc, url) except requests.Timeout: _handle_get_page_fail(link, "timed out", url) else: return inst
def url_to_path(self, path): return urllib_parse.urljoin(self.url, path)
<<<<<<< HEAD logger.warning('Cannot look at %s URL %s because it does not support ' 'lookup as web pages.', vcs_scheme, link) ======= logger.debug('Cannot look at %s URL %s', vcs_scheme, link) >>>>>>> b66a76afa15ab74019740676a52a071b85ed8f71 return None # Tack index.html onto file:// URLs that point to directories scheme, _, path, _, _, _ = urllib_parse.urlparse(url) if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))): # add trailing slash if not present so urljoin doesn't trim # final segment if not url.endswith('/'): url += '/' url = urllib_parse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s', url) try: resp = _get_html_response(url, session=session) except _NotHTTP: <<<<<<< HEAD logger.warning( 'Skipping page %s because it looks like an archive, and cannot ' 'be checked by a HTTP HEAD request.', link, ) except _NotHTML as exc: logger.warning( 'Skipping page %s because the %s request got Content-Type: %s.' 'The only supported Content-Type is text/html', link, exc.request_desc, exc.content_type,
def get_page(cls, link, skip_archives=True, session=None): if session is None: raise TypeError( "get_page() missing 1 required keyword argument: 'session'" ) url = link.url url = url.split('#', 1)[0] # Check for VCS schemes that do not support lookup as web pages. from pip._internal.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in '+:': logger.debug('Cannot look at %s URL %s', scheme, link) return None try: if skip_archives: filename = link.filename for bad_ext in ARCHIVE_EXTENSIONS: if filename.endswith(bad_ext): content_type = cls._get_content_type( url, session=session, ) if content_type.lower().startswith('text/html'): break else: logger.debug( 'Skipping page %s because of Content-Type: %s', link, content_type, ) return logger.debug('Getting page %s', url) # Tack index.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = \ urllib_parse.urlparse(url) if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))): # add trailing slash if not present so urljoin doesn't trim # final segment if not url.endswith('/'): url += '/' url = urllib_parse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s', url) resp = session.get( url, headers={ "Accept": "text/html", "Cache-Control": "max-age=600", }, ) resp.raise_for_status() # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement of an url. Unless we issue a HEAD request on every # url we cannot know ahead of time for sure if something is HTML # or not. However we can check after we've downloaded it. content_type = resp.headers.get('Content-Type', 'unknown') if not content_type.lower().startswith("text/html"): logger.debug( 'Skipping page %s because of Content-Type: %s', link, content_type, ) return inst = cls(resp.content, resp.url, resp.headers) except requests.HTTPError as exc: cls._handle_fail(link, exc, url) except SSLError as exc: reason = "There was a problem confirming the ssl certificate: " reason += str(exc) cls._handle_fail(link, reason, url, meth=logger.info) except requests.ConnectionError as exc: cls._handle_fail(link, "connection error: %s" % exc, url) except requests.Timeout: cls._handle_fail(link, "timed out", url) else: return inst
def process_line(line, filename, line_number, finder=None, comes_from=None, options=None, session=None, wheel_cache=None, constraint=False): """Process a single requirements line; This can result in creating/yielding requirements, or updating the finder. For lines that contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS_REQ, and they are scoped to the requirement. Other options from SUPPORTED_OPTIONS may be present, but are ignored. For lines that do not contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may be present, but are ignored. These lines may contain multiple options (although our docs imply only one is supported), and all our parsed and affect the finder. :param constraint: If True, parsing a constraints file. """ parser = build_parser() defaults = parser.get_default_values() defaults.index_url = None if finder: # `finder.format_control` will be updated during parsing defaults.format_control = finder.format_control args_str, options_str = break_args_options(line) opts, _ = parser.parse_args(shlex.split(options_str), defaults) # preserve for the nested code path line_comes_from = '%s %s (line %s)' % ( '-c' if constraint else '-r', filename, line_number) # yield a line requirement if args_str: isolated = options.isolated_mode if options else False if options: cmdoptions.check_install_build_global(options, opts) # get the options that apply to requirements req_options = {} for dest in SUPPORTED_OPTIONS_REQ_DEST: if dest in opts.__dict__ and opts.__dict__[dest]: req_options[dest] = opts.__dict__[dest] yield InstallRequirement.from_line( args_str, line_comes_from, constraint=constraint, isolated=isolated, options=req_options, wheel_cache=wheel_cache ) # yield an editable requirement elif opts.editables: isolated = options.isolated_mode if options else False default_vcs = options.default_vcs if options else None yield InstallRequirement.from_editable( opts.editables[0], comes_from=line_comes_from, constraint=constraint, default_vcs=default_vcs, isolated=isolated, wheel_cache=wheel_cache ) # parse a nested requirements file elif opts.requirements or opts.constraints: if opts.requirements: req_path = opts.requirements[0] nested_constraint = False else: req_path = opts.constraints[0] nested_constraint = True # original file is over http if SCHEME_RE.search(filename): # do a url join so relative paths work req_path = urllib_parse.urljoin(filename, req_path) # original file and nested file are paths elif not SCHEME_RE.search(req_path): # do a join so relative paths work req_dir = os.path.dirname(filename) req_path = os.path.join(os.path.dirname(filename), req_path) # TODO: Why not use `comes_from='-r {} (line {})'` here as well? parser = parse_requirements( req_path, finder, comes_from, options, session, constraint=nested_constraint, wheel_cache=wheel_cache ) for req in parser: yield req # set finder options elif finder: if opts.index_url: finder.index_urls = [opts.index_url] if opts.use_wheel is False: finder.use_wheel = False pip.index.fmt_ctl_no_use_wheel(finder.format_control) if opts.no_index is True: finder.index_urls = [] if opts.allow_all_external: finder.allow_all_external = opts.allow_all_external if opts.extra_index_urls: finder.index_urls.extend(opts.extra_index_urls) if opts.allow_external: finder.allow_external |= set( [normalize_name(v).lower() for v in opts.allow_external]) if opts.allow_unverified: # Remove after 7.0 finder.allow_unverified |= set( [normalize_name(v).lower() for v in opts.allow_unverified]) if opts.find_links: # FIXME: it would be nice to keep track of the source # of the find_links: support a find-links local path # relative to a requirements file. value = opts.find_links[0] req_dir = os.path.dirname(os.path.abspath(filename)) relative_to_reqs_file = os.path.join(req_dir, value) if os.path.exists(relative_to_reqs_file): value = relative_to_reqs_file finder.find_links.append(value)
def get_page(cls, link, skip_archives=True, session=None): if session is None: raise TypeError("get_page() missing 1 required keyword argument: 'session'") url = link.url url = url.split("#", 1)[0] # Check for VCS schemes that do not support lookup as web pages. from pip.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in "+:": logger.debug("Cannot look at %s URL %s", scheme, link) return None try: if skip_archives: filename = link.filename for bad_ext in [".tar", ".tar.gz", ".tar.bz2", ".tgz", ".zip"]: if filename.endswith(bad_ext): content_type = cls._get_content_type(url, session=session) if content_type.lower().startswith("text/html"): break else: logger.debug("Skipping page %s because of Content-Type: %s", link, content_type) return logger.debug("Getting page %s", url) # Tack index.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = urllib_parse.urlparse(url) if scheme == "file" and os.path.isdir(urllib_request.url2pathname(path)): # add trailing slash if not present so urljoin doesn't trim # final segment if not url.endswith("/"): url += "/" url = urllib_parse.urljoin(url, "index.html") logger.debug(" file: URL is directory, getting %s", url) resp = session.get(url, headers={"Accept": "text/html", "Cache-Control": "max-age=600"}) resp.raise_for_status() # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement of an url. Unless we issue a HEAD request on every # url we cannot know ahead of time for sure if something is HTML # or not. However we can check after we've downloaded it. content_type = resp.headers.get("Content-Type", "unknown") if not content_type.lower().startswith("text/html"): logger.debug("Skipping page %s because of Content-Type: %s", link, content_type) return inst = cls(resp.content, resp.url, resp.headers, trusted=link.trusted) except requests.HTTPError as exc: level = 2 if exc.response.status_code == 404 else 1 cls._handle_fail(link, exc, url, level=level) except requests.ConnectionError as exc: cls._handle_fail(link, "connection error: %s" % exc, url) except requests.Timeout: cls._handle_fail(link, "timed out", url) except SSLError as exc: reason = "There was a problem confirming the ssl certificate: " "%s" % exc cls._handle_fail(link, reason, url, level=2, meth=logger.info) else: return inst
def process_line( line, # type: Text filename, # type: str line_number, # type: int finder=None, # type: Optional[PackageFinder] comes_from=None, # type: Optional[str] options=None, # type: Optional[optparse.Values] session=None, # type: Optional[PipSession] wheel_cache=None, # type: Optional[WheelCache] use_pep517=None, # type: Optional[bool] constraint=False # type: bool ): # type: (...) -> Iterator[InstallRequirement] """Process a single requirements line; This can result in creating/yielding requirements, or updating the finder. For lines that contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS_REQ, and they are scoped to the requirement. Other options from SUPPORTED_OPTIONS may be present, but are ignored. For lines that do not contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may be present, but are ignored. These lines may contain multiple options (although our docs imply only one is supported), and all our parsed and affect the finder. :param constraint: If True, parsing a constraints file. :param options: OptionParser options that we may update """ parser = build_parser(line) defaults = parser.get_default_values() defaults.index_url = None if finder: defaults.format_control = finder.format_control args_str, options_str = break_args_options(line) # Prior to 2.7.3, shlex cannot deal with unicode entries if sys.version_info < (2, 7, 3): # https://github.com/python/mypy/issues/1174 options_str = options_str.encode('utf8') # type: ignore # https://github.com/python/mypy/issues/1174 opts, _ = parser.parse_args( shlex.split(options_str), defaults) # type: ignore # preserve for the nested code path line_comes_from = '%s %s (line %s)' % ( '-c' if constraint else '-r', filename, line_number, ) # yield a line requirement if args_str: isolated = options.isolated_mode if options else False if options: cmdoptions.check_install_build_global(options, opts) # get the options that apply to requirements req_options = {} for dest in SUPPORTED_OPTIONS_REQ_DEST: if dest in opts.__dict__ and opts.__dict__[dest]: req_options[dest] = opts.__dict__[dest] yield install_req_from_line( args_str, line_comes_from, constraint=constraint, use_pep517=use_pep517, isolated=isolated, options=req_options, wheel_cache=wheel_cache ) # yield an editable requirement elif opts.editables: isolated = options.isolated_mode if options else False yield install_req_from_editable( opts.editables[0], comes_from=line_comes_from, use_pep517=use_pep517, constraint=constraint, isolated=isolated, wheel_cache=wheel_cache ) # parse a nested requirements file elif opts.requirements or opts.constraints: if opts.requirements: req_path = opts.requirements[0] nested_constraint = False else: req_path = opts.constraints[0] nested_constraint = True # original file is over http if SCHEME_RE.search(filename): # do a url join so relative paths work req_path = urllib_parse.urljoin(filename, req_path) # original file and nested file are paths elif not SCHEME_RE.search(req_path): # do a join so relative paths work req_path = os.path.join(os.path.dirname(filename), req_path) # TODO: Why not use `comes_from='-r {} (line {})'` here as well? parsed_reqs = parse_requirements( req_path, finder, comes_from, options, session, constraint=nested_constraint, wheel_cache=wheel_cache ) for req in parsed_reqs: yield req # percolate hash-checking option upward elif opts.require_hashes: options.require_hashes = opts.require_hashes # set finder options elif finder: if opts.index_url: finder.index_urls = [opts.index_url] if opts.no_index is True: finder.index_urls = [] if opts.extra_index_urls: finder.index_urls.extend(opts.extra_index_urls) if opts.find_links: # FIXME: it would be nice to keep track of the source # of the find_links: support a find-links local path # relative to a requirements file. value = opts.find_links[0] req_dir = os.path.dirname(os.path.abspath(filename)) relative_to_reqs_file = os.path.join(req_dir, value) if os.path.exists(relative_to_reqs_file): value = relative_to_reqs_file finder.find_links.append(value) if opts.pre: finder.allow_all_prereleases = True if opts.trusted_hosts: finder.secure_origins.extend( ("*", host, "*") for host in opts.trusted_hosts)
def _get_html_page(link, session=None): # type: (Link, Optional[PipSession]) -> Optional[HTMLPage] if session is None: raise TypeError( "_get_html_page() missing 1 required keyword argument: 'session'") url = link.url.split("#", 1)[0] # Check for VCS schemes that do not support lookup as web pages. vcs_scheme = _match_vcs_scheme(url) if vcs_scheme: logger.warning( "Cannot look at %s URL %s because it does not support " "lookup as web pages.", vcs_scheme, link, ) return None # Tack index.html onto file:// URLs that point to directories scheme, _, path, _, _, _ = urllib_parse.urlparse(url) if scheme == "file" and os.path.isdir(urllib_request.url2pathname(path)): # add trailing slash if not present so urljoin doesn't trim # final segment if not url.endswith("/"): url += "/" url = urllib_parse.urljoin(url, "index.html") logger.debug(" file: URL is directory, getting %s", url) try: resp = _get_html_response(url, session=session) except _NotHTTP: logger.warning( "Skipping page %s because it looks like an archive, and cannot " "be checked by a HTTP HEAD request.", link, ) except _NotHTML as exc: logger.warning( "Skipping page %s because the %s request got Content-Type: %s." "The only supported Content-Type is text/html", link, exc.request_desc, exc.content_type, ) except NetworkConnectionError as exc: _handle_get_page_fail(link, exc) except RetryError as exc: _handle_get_page_fail(link, exc) except SSLError as exc: reason = "There was a problem confirming the ssl certificate: " reason += str(exc) _handle_get_page_fail(link, reason, meth=logger.info) except requests.ConnectionError as exc: _handle_get_page_fail(link, "connection error: {}".format(exc)) except requests.Timeout: _handle_get_page_fail(link, "timed out") else: return _make_html_page(resp, cache_link_parsing=link.cache_link_parsing) return None
def parse_requirements(filename, finder=None, comes_from=None, options=None, session=None): if session is None: raise TypeError( "parse_requirements() missing 1 required keyword argument: " "'session'") skip_match = None skip_regex = options.skip_requirements_regex if options else None if skip_regex: skip_match = re.compile(skip_regex) reqs_file_dir = os.path.dirname(os.path.abspath(filename)) filename, content = get_file_content( filename, comes_from=comes_from, session=session, ) for line_number, line in enumerate(content.splitlines(), 1): line = line.strip() # Remove comments from file and all spaces before it line = re.sub(r"(^|\s)+#.*$", "", line) if not line: continue if skip_match and skip_match.search(line): continue if line.startswith(('-r', '--requirement')): req_url = _remove_prefixes(line, '-r', '--requirement') if _scheme_re.search(filename): # Relative to a URL req_url = urllib_parse.urljoin(filename, req_url) elif not _scheme_re.search(req_url): req_url = os.path.join(os.path.dirname(filename), req_url) for item in parse_requirements(req_url, finder, comes_from=filename, options=options, session=session): yield item elif line.startswith(('-Z', '--always-unzip')): # No longer used, but previously these were used in # requirement files, so we'll ignore. pass elif line.startswith(('-f', '--find-links')): find_links = _remove_prefixes(line, '-f', '--find-links') # FIXME: it would be nice to keep track of the source of # the find_links: # support a find-links local path relative to a requirements file relative_to_reqs_file = os.path.join(reqs_file_dir, find_links) if os.path.exists(relative_to_reqs_file): find_links = relative_to_reqs_file if finder: finder.find_links.append(find_links) elif line.startswith(('-i', '--index-url')): index_url = _remove_prefixes(line, '-i', '--index-url') if finder: finder.index_urls = [index_url] elif line.startswith('--extra-index-url'): line = _remove_prefix(line, '--extra-index-url') if finder: finder.index_urls.append(line) elif line.startswith('--use-wheel'): # Default in 1.5 pass elif line.startswith('--no-use-wheel'): if finder: finder.use_wheel = False elif line.startswith('--no-index'): if finder: finder.index_urls = [] elif line.startswith("--allow-external"): line = _remove_prefix(line, '--allow-external') if finder: finder.allow_external |= set([normalize_name(line).lower()]) elif line.startswith("--allow-all-external"): if finder: finder.allow_all_external = True # Remove in 7.0 elif line.startswith("--no-allow-external"): pass # Remove in 7.0 elif line.startswith("--no-allow-insecure"): pass # Remove after 7.0 elif line.startswith("--allow-insecure"): line = _remove_prefix(line, '--allow-insecure') if finder: finder.allow_unverified |= set([normalize_name(line).lower()]) elif line.startswith("--allow-unverified"): line = _remove_prefix(line, '--allow-unverified') if finder: finder.allow_unverified |= set([normalize_name(line).lower()]) else: comes_from = '-r %s (line %s)' % (filename, line_number) if line.startswith(('-e', '--editable')): editable = _remove_prefixes(line, '-e', '--editable') req = InstallRequirement.from_editable( editable, comes_from=comes_from, default_vcs=options.default_vcs if options else None, isolated=options.isolated_mode if options else False, ) else: req = InstallRequirement.from_line( line, comes_from, isolated=options.isolated_mode if options else False, ) yield req
def process_line( line, # type: Text filename, # type: str line_number, # type: int finder=None, # type: Optional[PackageFinder] comes_from=None, # type: Optional[str] options=None, # type: Optional[optparse.Values] session=None, # type: Optional[PipSession] wheel_cache=None, # type: Optional[WheelCache] use_pep517=None, # type: Optional[bool] constraint=False, # type: bool ): # type: (...) -> Iterator[InstallRequirement] """Process a single requirements line; This can result in creating/yielding requirements, or updating the finder. For lines that contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS_REQ, and they are scoped to the requirement. Other options from SUPPORTED_OPTIONS may be present, but are ignored. For lines that do not contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may be present, but are ignored. These lines may contain multiple options (although our docs imply only one is supported), and all our parsed and affect the finder. :param constraint: If True, parsing a line_constraints file. :param options: OptionParser options that we may update """ parser = build_parser(line) defaults = parser.get_default_values() defaults.index_url = None if finder: defaults.format_control = finder.format_control args_str, options_str = break_args_options(line) # Prior to 2.7.3, shlex cannot deal with unicode entries if sys.version_info < (2, 7, 3): # https://github.com/python/mypy/issues/1174 options_str = options_str.encode('utf8') # type: ignore # https://github.com/python/mypy/issues/1174 opts, _ = parser.parse_args(shlex.split(options_str), defaults) # type: ignore # preserve for the nested code path line_comes_from = '%s %s (line %s)' % ( '-c' if constraint else '-r', filename, line_number, ) # yield a line requirement if args_str: isolated = options.isolated_mode if options else False if options: cmdoptions.check_install_build_global(options, opts) # get the options that apply to requirements req_options = {} for dest in SUPPORTED_OPTIONS_REQ_DEST: if dest in opts.__dict__ and opts.__dict__[dest]: req_options[dest] = opts.__dict__[dest] line_source = 'line {} of {}'.format(line_number, filename) yield install_req_from_line( args_str, comes_from=line_comes_from, use_pep517=use_pep517, isolated=isolated, options=req_options, wheel_cache=wheel_cache, constraint=constraint, line_source=line_source, ) # yield an editable requirement elif opts.editables: isolated = options.isolated_mode if options else False yield install_req_from_editable(opts.editables[0], comes_from=line_comes_from, use_pep517=use_pep517, constraint=constraint, isolated=isolated, wheel_cache=wheel_cache) # parse a nested requirements file elif opts.requirements or opts.constraints: if opts.requirements: req_path = opts.requirements[0] nested_constraint = False else: req_path = opts.constraints[0] nested_constraint = True # original file is over http if SCHEME_RE.search(filename): # do a url join so relative paths work req_path = urllib_parse.urljoin(filename, req_path) # original file and nested file are paths elif not SCHEME_RE.search(req_path): # do a join so relative paths work req_path = os.path.join(os.path.dirname(filename), req_path) # TODO: Why not use `comes_from='-r {} (line {})'` here as well? parsed_reqs = parse_requirements(req_path, finder, comes_from, options, session, constraint=nested_constraint, wheel_cache=wheel_cache) for req in parsed_reqs: yield req # percolate hash-checking option upward elif opts.require_hashes: options.require_hashes = opts.require_hashes # set finder options elif finder: find_links = finder.find_links index_urls = finder.index_urls if opts.index_url: index_urls = [opts.index_url] if opts.no_index is True: index_urls = [] if opts.extra_index_urls: index_urls.extend(opts.extra_index_urls) if opts.find_links: # FIXME: it would be nice to keep track of the source # of the find_links: support a find-links local path # relative to a requirements file. value = opts.find_links[0] req_dir = os.path.dirname(os.path.abspath(filename)) relative_to_reqs_file = os.path.join(req_dir, value) if os.path.exists(relative_to_reqs_file): value = relative_to_reqs_file find_links.append(value) search_scope = SearchScope( find_links=find_links, index_urls=index_urls, ) finder.search_scope = search_scope if opts.pre: finder.set_allow_all_prereleases() for host in opts.trusted_hosts or []: source = 'line {} of {}'.format(line_number, filename) finder.add_trusted_host(host, source=source)
def _url_for_path(self, path): return urllib_parse.urljoin(self.url, path)
def process_line(line, filename, line_number, finder=None, comes_from=None, options=None, session=None, wheel_cache=None, constraint=False): """Process a single requirements line; This can result in creating/yielding requirements, or updating the finder. For lines that contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS_REQ, and they are scoped to the requirement. Other options from SUPPORTED_OPTIONS may be present, but are ignored. For lines that do not contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may be present, but are ignored. These lines may contain multiple options (although our docs imply only one is supported), and all our parsed and affect the finder. :param constraint: If True, parsing a constraints file. """ parser = build_parser() defaults = parser.get_default_values() defaults.index_url = None if finder: # `finder.format_control` will be updated during parsing defaults.format_control = finder.format_control args_str, options_str = break_args_options(line) opts, _ = parser.parse_args(shlex.split(options_str), defaults) # preserve for the nested code path line_comes_from = '%s %s (line %s)' % ('-c' if constraint else '-r', filename, line_number) # yield a line requirement if args_str: isolated = options.isolated_mode if options else False if options: cmdoptions.check_install_build_global(options, opts) # get the options that apply to requirements req_options = {} for dest in SUPPORTED_OPTIONS_REQ_DEST: if dest in opts.__dict__ and opts.__dict__[dest]: req_options[dest] = opts.__dict__[dest] yield InstallRequirement.from_line(args_str, line_comes_from, constraint=constraint, isolated=isolated, options=req_options, wheel_cache=wheel_cache) # yield an editable requirement elif opts.editables: isolated = options.isolated_mode if options else False default_vcs = options.default_vcs if options else None yield InstallRequirement.from_editable(opts.editables[0], comes_from=line_comes_from, constraint=constraint, default_vcs=default_vcs, isolated=isolated, wheel_cache=wheel_cache) # parse a nested requirements file elif opts.requirements or opts.constraints: if opts.requirements: req_path = opts.requirements[0] nested_constraint = False else: req_path = opts.constraints[0] nested_constraint = True # original file is over http if SCHEME_RE.search(filename): # do a url join so relative paths work req_path = urllib_parse.urljoin(filename, req_path) # original file and nested file are paths elif not SCHEME_RE.search(req_path): # do a join so relative paths work req_dir = os.path.dirname(filename) req_path = os.path.join(os.path.dirname(filename), req_path) # TODO: Why not use `comes_from='-r {} (line {})'` here as well? parser = parse_requirements(req_path, finder, comes_from, options, session, constraint=nested_constraint, wheel_cache=wheel_cache) for req in parser: yield req # set finder options elif finder: if opts.index_url: finder.index_urls = [opts.index_url] if opts.use_wheel is False: finder.use_wheel = False pip.index.fmt_ctl_no_use_wheel(finder.format_control) if opts.no_index is True: finder.index_urls = [] if opts.allow_all_external: finder.allow_all_external = opts.allow_all_external if opts.extra_index_urls: finder.index_urls.extend(opts.extra_index_urls) if opts.allow_external: finder.allow_external |= set( [normalize_name(v).lower() for v in opts.allow_external]) if opts.allow_unverified: # Remove after 7.0 finder.allow_unverified |= set( [normalize_name(v).lower() for v in opts.allow_unverified]) if opts.find_links: # FIXME: it would be nice to keep track of the source # of the find_links: support a find-links local path # relative to a requirements file. value = opts.find_links[0] req_dir = os.path.dirname(os.path.abspath(filename)) relative_to_reqs_file = os.path.join(req_dir, value) if os.path.exists(relative_to_reqs_file): value = relative_to_reqs_file finder.find_links.append(value)
def process_line(line, filename, line_number, finder=None, comes_from=None, options=None, session=None, wheel_cache=None, constraint=False): """Process a single requirements line; This can result in creating/yielding requirements, or updating the finder. For lines that contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS_REQ, and they are scoped to the requirement. Other options from SUPPORTED_OPTIONS may be present, but are ignored. For lines that do not contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may be present, but are ignored. These lines may contain multiple options (although our docs imply only one is supported), and all our parsed and affect the finder. :param constraint: If True, parsing a constraints file. :param options: OptionParser options that we may update """ parser = build_parser() defaults = parser.get_default_values() defaults.index_url = None if finder: # `finder.format_control` will be updated during parsing defaults.format_control = finder.format_control args_str, options_str = break_args_options(line) if sys.version_info < (2, 7, 3): # Prior to 2.7.3, shlex cannot deal with unicode entries options_str = options_str.encode('utf8') opts, _ = parser.parse_args(shlex.split(options_str), defaults) # preserve for the nested code path line_comes_from = '%s %s (line %s)' % ( '-c' if constraint else '-r', filename, line_number) # yield a line requirement if args_str: isolated = options.isolated_mode if options else False if options: cmdoptions.check_install_build_global(options, opts) # get the options that apply to requirements req_options = {} for dest in SUPPORTED_OPTIONS_REQ_DEST: if dest in opts.__dict__ and opts.__dict__[dest]: req_options[dest] = opts.__dict__[dest] yield InstallRequirement.from_line( args_str, line_comes_from, constraint=constraint, isolated=isolated, options=req_options, wheel_cache=wheel_cache ) # yield an editable requirement elif opts.editables: isolated = options.isolated_mode if options else False default_vcs = options.default_vcs if options else None yield InstallRequirement.from_editable( opts.editables[0], comes_from=line_comes_from, constraint=constraint, default_vcs=default_vcs, isolated=isolated, wheel_cache=wheel_cache ) # parse a nested requirements file elif opts.requirements or opts.constraints: if opts.requirements: req_path = opts.requirements[0] nested_constraint = False else: req_path = opts.constraints[0] nested_constraint = True # original file is over http if SCHEME_RE.search(filename): # do a url join so relative paths work req_path = urllib_parse.urljoin(filename, req_path) # original file and nested file are paths elif not SCHEME_RE.search(req_path): # do a join so relative paths work req_path = os.path.join(os.path.dirname(filename), req_path) # TODO: Why not use `comes_from='-r {} (line {})'` here as well? parser = parse_requirements( req_path, finder, comes_from, options, session, constraint=nested_constraint, wheel_cache=wheel_cache ) for req in parser: yield req # percolate hash-checking option upward elif opts.require_hashes: options.require_hashes = opts.require_hashes # set finder options elif finder: if opts.allow_external: warnings.warn( "--allow-external has been deprecated and will be removed in " "the future. Due to changes in the repository protocol, it no " "longer has any effect.", RemovedInPip10Warning, ) if opts.allow_all_external: warnings.warn( "--allow-all-external has been deprecated and will be removed " "in the future. Due to changes in the repository protocol, it " "no longer has any effect.", RemovedInPip10Warning, ) if opts.allow_unverified: warnings.warn( "--allow-unverified has been deprecated and will be removed " "in the future. Due to changes in the repository protocol, it " "no longer has any effect.", RemovedInPip10Warning, ) if opts.index_url: finder.index_urls = [opts.index_url] if opts.use_wheel is False: finder.use_wheel = False pip.index.fmt_ctl_no_use_wheel(finder.format_control) if opts.no_index is True: finder.index_urls = [] if opts.extra_index_urls: finder.index_urls.extend(opts.extra_index_urls) if opts.find_links: # FIXME: it would be nice to keep track of the source # of the find_links: support a find-links local path # relative to a requirements file. value = opts.find_links[0] req_dir = os.path.dirname(os.path.abspath(filename)) relative_to_reqs_file = os.path.join(req_dir, value) if os.path.exists(relative_to_reqs_file): value = relative_to_reqs_file finder.find_links.append(value) if opts.pre: finder.allow_all_prereleases = True if opts.process_dependency_links: finder.process_dependency_links = True if opts.trusted_hosts: finder.secure_origins.extend( ("*", host, "*") for host in opts.trusted_hosts)
def parse_requirements(filename, finder=None, comes_from=None, options=None, session=None): if session is None: raise TypeError( "parse_requirements() missing 1 required keyword argument: " "'session'" ) skip_match = None skip_regex = options.skip_requirements_regex if options else None if skip_regex: skip_match = re.compile(skip_regex) reqs_file_dir = os.path.dirname(os.path.abspath(filename)) filename, content = get_file_content( filename, comes_from=comes_from, session=session, ) for line_number, line in enumerate(content.splitlines(), 1): line = line.strip() # Remove comments from file and all spaces before it line = re.sub(r"(^|\s)+#.*$", "", line) if not line: continue if skip_match and skip_match.search(line): continue if line.startswith(('-r', '--requirement')): req_url = _remove_prefixes(line, '-r', '--requirement') if _scheme_re.search(filename): # Relative to a URL req_url = urllib_parse.urljoin(filename, req_url) elif not _scheme_re.search(req_url): req_url = os.path.join(os.path.dirname(filename), req_url) for item in parse_requirements( req_url, finder, comes_from=filename, options=options, session=session): yield item elif line.startswith(('-Z', '--always-unzip')): # No longer used, but previously these were used in # requirement files, so we'll ignore. pass elif line.startswith(('-f', '--find-links')): find_links = _remove_prefixes(line, '-f', '--find-links') # FIXME: it would be nice to keep track of the source of # the find_links: # support a find-links local path relative to a requirements file relative_to_reqs_file = os.path.join(reqs_file_dir, find_links) if os.path.exists(relative_to_reqs_file): find_links = relative_to_reqs_file if finder: finder.find_links.append(find_links) elif line.startswith(('-i', '--index-url')): index_url = _remove_prefixes(line, '-i', '--index-url') if finder: finder.index_urls = [index_url] elif line.startswith('--extra-index-url'): line = _remove_prefix(line, '--extra-index-url') if finder: finder.index_urls.append(line) elif line.startswith('--use-wheel'): # Default in 1.5 pass elif line.startswith('--no-use-wheel'): if finder: finder.use_wheel = False elif line.startswith('--no-index'): if finder: finder.index_urls = [] elif line.startswith("--allow-external"): line = _remove_prefix(line, '--allow-external') if finder: finder.allow_external |= set([normalize_name(line).lower()]) elif line.startswith("--allow-all-external"): if finder: finder.allow_all_external = True # Remove in 7.0 elif line.startswith("--no-allow-external"): pass # Remove in 7.0 elif line.startswith("--no-allow-insecure"): pass # Remove after 7.0 elif line.startswith("--allow-insecure"): line = _remove_prefix(line, '--allow-insecure') if finder: finder.allow_unverified |= set([normalize_name(line).lower()]) elif line.startswith("--allow-unverified"): line = _remove_prefix(line, '--allow-unverified') if finder: finder.allow_unverified |= set([normalize_name(line).lower()]) else: comes_from = '-r %s (line %s)' % (filename, line_number) if line.startswith(('-e', '--editable')): editable = _remove_prefixes(line, '-e', '--editable') req = InstallRequirement.from_editable( editable, comes_from=comes_from, default_vcs=options.default_vcs if options else None, isolated=options.isolated_mode if options else False, ) else: req = InstallRequirement.from_line( line, comes_from, isolated=options.isolated_mode if options else False, ) yield req
# Remove comments from file line = re.sub(r"(^|\s)#.*$", "", line) if not line or line.startswith('#'): continue if skip_match and skip_match.search(line): continue if line.startswith('-r') or line.startswith('--requirement'): if line.startswith('-r'): req_url = line[2:].strip() else: req_url = line[len('--requirement'):].strip().strip('=') >>>>>>> bde4533e29dfedadf6bcf9d451baa615bc828a59 if _scheme_re.search(filename): # Relative to a URL req_url = urllib_parse.urljoin(filename, req_url) elif not _scheme_re.search(req_url): req_url = os.path.join(os.path.dirname(filename), req_url) for item in parse_requirements( req_url, finder, comes_from=filename, options=options, session=session): yield item <<<<<<< HEAD elif line.startswith(('-Z', '--always-unzip')): # No longer used, but previously these were used in # requirement files, so we'll ignore. pass elif line.startswith(('-f', '--find-links')): find_links = _remove_prefixes(line, '-f', '--find-links')
def process_line( line, # type: Text filename, # type: str line_number, # type: int finder=None, # type: Optional[PackageFinder] comes_from=None, # type: Optional[str] options=None, # type: Optional[optparse.Values] session=None, # type: Optional[PipSession] wheel_cache=None, # type: Optional[WheelCache] use_pep517=None, # type: Optional[bool] constraint=False, # type: bool ): # type: (...) -> Iterator[InstallRequirement] """Process a single requirements line; This can result in creating/yielding requirements, or updating the finder. For lines that contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS_REQ, and they are scoped to the requirement. Other options from SUPPORTED_OPTIONS may be present, but are ignored. For lines that do not contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may be present, but are ignored. These lines may contain multiple options (although our docs imply only one is supported), and all our parsed and affect the finder. :param constraint: If True, parsing a constraints file. :param options: OptionParser options that we may update """ line_parser = get_line_parser(finder) try: args_str, opts = line_parser(line) except OptionParsingError as e: # add offending line msg = 'Invalid requirement: %s\n%s' % (line, e.msg) raise RequirementsFileParseError(msg) # parse a nested requirements file if ( not args_str and not opts.editables and (opts.requirements or opts.constraints) ): if opts.requirements: req_path = opts.requirements[0] nested_constraint = False else: req_path = opts.constraints[0] nested_constraint = True # original file is over http if SCHEME_RE.search(filename): # do a url join so relative paths work req_path = urllib_parse.urljoin(filename, req_path) # original file and nested file are paths elif not SCHEME_RE.search(req_path): # do a join so relative paths work req_path = os.path.join(os.path.dirname(filename), req_path) parsed_reqs = parse_requirements( req_path, finder, comes_from, options, session, constraint=nested_constraint, wheel_cache=wheel_cache ) for req in parsed_reqs: yield req return # preserve for the nested code path line_comes_from = '%s %s (line %s)' % ( '-c' if constraint else '-r', filename, line_number, ) # yield a line requirement if args_str: isolated = options.isolated_mode if options else False if options: cmdoptions.check_install_build_global(options, opts) # get the options that apply to requirements req_options = {} for dest in SUPPORTED_OPTIONS_REQ_DEST: if dest in opts.__dict__ and opts.__dict__[dest]: req_options[dest] = opts.__dict__[dest] line_source = 'line {} of {}'.format(line_number, filename) yield install_req_from_line( args_str, comes_from=line_comes_from, use_pep517=use_pep517, isolated=isolated, options=req_options, wheel_cache=wheel_cache, constraint=constraint, line_source=line_source, ) # yield an editable requirement elif opts.editables: isolated = options.isolated_mode if options else False yield install_req_from_editable( opts.editables[0], comes_from=line_comes_from, use_pep517=use_pep517, constraint=constraint, isolated=isolated, wheel_cache=wheel_cache ) # percolate hash-checking option upward elif opts.require_hashes: options.require_hashes = opts.require_hashes # set finder options elif finder: find_links = finder.find_links index_urls = finder.index_urls if opts.index_url: index_urls = [opts.index_url] if opts.no_index is True: index_urls = [] if opts.extra_index_urls: index_urls.extend(opts.extra_index_urls) if opts.find_links: # FIXME: it would be nice to keep track of the source # of the find_links: support a find-links local path # relative to a requirements file. value = opts.find_links[0] req_dir = os.path.dirname(os.path.abspath(filename)) relative_to_reqs_file = os.path.join(req_dir, value) if os.path.exists(relative_to_reqs_file): value = relative_to_reqs_file find_links.append(value) search_scope = SearchScope( find_links=find_links, index_urls=index_urls, ) finder.search_scope = search_scope if opts.pre: finder.set_allow_all_prereleases() for host in opts.trusted_hosts or []: source = 'line {} of {}'.format(line_number, filename) session.add_trusted_host(host, source=source)