def path_to_url(path): """ Convert a path to a file: URL. The path will be made absolute and have quoted path parts. """ path = os.path.normpath(os.path.abspath(path)) url = urllib_parse.urljoin('file:', urllib_request.pathname2url(path)) return url
def links(self): """Yields all links in the page""" for anchor in self.parsed.findall(".//a"): if anchor.get("href"): href = anchor.get("href") url = self.clean_link(urllib_parse.urljoin( self.base_url, href)) pyrequire = anchor.get('data-requires-python') pyrequire = unescape(pyrequire) if pyrequire else None yield Link(url, self, requires_python=pyrequire)
def get_page(cls, link, skip_archives=True, session=None): if session is None: raise TypeError( "get_page() missing 1 required keyword argument: 'session'" ) url = link.url url = url.split('#', 1)[0] # Check for VCS schemes that do not support lookup as web pages. from pip9.vcs import VcsSupport for scheme in VcsSupport.schemes: if url.lower().startswith(scheme) and url[len(scheme)] in '+:': logger.debug('Cannot look at %s URL %s', scheme, link) return None try: if skip_archives: filename = link.filename for bad_ext in ARCHIVE_EXTENSIONS: if filename.endswith(bad_ext): content_type = cls._get_content_type( url, session=session, ) if content_type.lower().startswith('text/html'): break else: logger.debug( 'Skipping page %s because of Content-Type: %s', link, content_type, ) return logger.debug('Getting page %s', url) # Tack index.html onto file:// URLs that point to directories (scheme, netloc, path, params, query, fragment) = \ urllib_parse.urlparse(url) if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))): # add trailing slash if not present so urljoin doesn't trim # final segment if not url.endswith('/'): url += '/' url = urllib_parse.urljoin(url, 'index.html') logger.debug(' file: URL is directory, getting %s', url) resp = session.get( url, headers={ "Accept": "text/html", "Cache-Control": "max-age=600", }, ) resp.raise_for_status() # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement of an url. Unless we issue a HEAD request on every # url we cannot know ahead of time for sure if something is HTML # or not. However we can check after we've downloaded it. content_type = resp.headers.get('Content-Type', 'unknown') if not content_type.lower().startswith("text/html"): logger.debug( 'Skipping page %s because of Content-Type: %s', link, content_type, ) return inst = cls(resp.content, resp.url, resp.headers) except requests.HTTPError as exc: cls._handle_fail(link, exc, url) except SSLError as exc: reason = ("There was a problem confirming the ssl certificate: " "%s" % exc) cls._handle_fail(link, reason, url, meth=logger.info) except requests.ConnectionError as exc: cls._handle_fail(link, "connection error: %s" % exc, url) except requests.Timeout: cls._handle_fail(link, "timed out", url) else: return inst
def url_to_path(self, path): return urllib_parse.urljoin(self.url, path)
def process_line(line, filename, line_number, finder=None, comes_from=None, options=None, session=None, wheel_cache=None, constraint=False): """Process a single requirements line; This can result in creating/yielding requirements, or updating the finder. For lines that contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS_REQ, and they are scoped to the requirement. Other options from SUPPORTED_OPTIONS may be present, but are ignored. For lines that do not contain requirements, the only options that have an effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may be present, but are ignored. These lines may contain multiple options (although our docs imply only one is supported), and all our parsed and affect the finder. :param constraint: If True, parsing a constraints file. :param options: OptionParser options that we may update """ parser = build_parser() defaults = parser.get_default_values() defaults.index_url = None if finder: # `finder.format_control` will be updated during parsing defaults.format_control = finder.format_control args_str, options_str = break_args_options(line) if sys.version_info < (2, 7, 3): # Prior to 2.7.3, shlex cannot deal with unicode entries options_str = options_str.encode('utf8') opts, _ = parser.parse_args(shlex.split(options_str), defaults) # preserve for the nested code path line_comes_from = '%s %s (line %s)' % ('-c' if constraint else '-r', filename, line_number) # yield a line requirement if args_str: isolated = options.isolated_mode if options else False if options: cmdoptions.check_install_build_global(options, opts) # get the options that apply to requirements req_options = {} for dest in SUPPORTED_OPTIONS_REQ_DEST: if dest in opts.__dict__ and opts.__dict__[dest]: req_options[dest] = opts.__dict__[dest] yield InstallRequirement.from_line(args_str, line_comes_from, constraint=constraint, isolated=isolated, options=req_options, wheel_cache=wheel_cache) # yield an editable requirement elif opts.editables: isolated = options.isolated_mode if options else False default_vcs = options.default_vcs if options else None yield InstallRequirement.from_editable(opts.editables[0], comes_from=line_comes_from, constraint=constraint, default_vcs=default_vcs, isolated=isolated, wheel_cache=wheel_cache) # parse a nested requirements file elif opts.requirements or opts.constraints: if opts.requirements: req_path = opts.requirements[0] nested_constraint = False else: req_path = opts.constraints[0] nested_constraint = True # original file is over http if SCHEME_RE.search(filename): # do a url join so relative paths work req_path = urllib_parse.urljoin(filename, req_path) # original file and nested file are paths elif not SCHEME_RE.search(req_path): # do a join so relative paths work req_path = os.path.join(os.path.dirname(filename), req_path) # TODO: Why not use `comes_from='-r {} (line {})'` here as well? parser = parse_requirements(req_path, finder, comes_from, options, session, constraint=nested_constraint, wheel_cache=wheel_cache) for req in parser: yield req # percolate hash-checking option upward elif opts.require_hashes: options.require_hashes = opts.require_hashes # set finder options elif finder: if opts.allow_external: warnings.warn( "--allow-external has been deprecated and will be removed in " "the future. Due to changes in the repository protocol, it no " "longer has any effect.", RemovedInPip10Warning, ) if opts.allow_all_external: warnings.warn( "--allow-all-external has been deprecated and will be removed " "in the future. Due to changes in the repository protocol, it " "no longer has any effect.", RemovedInPip10Warning, ) if opts.allow_unverified: warnings.warn( "--allow-unverified has been deprecated and will be removed " "in the future. Due to changes in the repository protocol, it " "no longer has any effect.", RemovedInPip10Warning, ) if opts.index_url: finder.index_urls = [opts.index_url] if opts.use_wheel is False: finder.use_wheel = False pip9.index.fmt_ctl_no_use_wheel(finder.format_control) if opts.no_index is True: finder.index_urls = [] if opts.extra_index_urls: finder.index_urls.extend(opts.extra_index_urls) if opts.find_links: # FIXME: it would be nice to keep track of the source # of the find_links: support a find-links local path # relative to a requirements file. value = opts.find_links[0] req_dir = os.path.dirname(os.path.abspath(filename)) relative_to_reqs_file = os.path.join(req_dir, value) if os.path.exists(relative_to_reqs_file): value = relative_to_reqs_file finder.find_links.append(value) if opts.pre: finder.allow_all_prereleases = True if opts.process_dependency_links: finder.process_dependency_links = True if opts.trusted_hosts: finder.secure_origins.extend( ("*", host, "*") for host in opts.trusted_hosts)