def open_with_auth2(url): """ Open a urllib2 request, handling HTTP authentication In this version, user-agent is ignored """ scheme, netloc, path, params, query, frag = urlparse(url) if scheme in ('http', 'https'): auth, host = splituser(netloc) else: auth = None if auth: auth = "Basic " + unquote(auth).encode('base64').strip() new_url = urlunparse((scheme, host, path, params, query, frag)) request = Request(new_url) request.add_header("Authorization", auth) else: request = Request(url) # request.add_header('User-Agent', user_agent) fp = urlopen(request) if auth: # Put authentication info back into request URL if same host, # so that links found on the page will work s2, h2, path2, param2, query2, frag2 = urlparse(fp.url) if s2 == scheme and h2 == host: fp.url = urlunparse((s2, netloc, path2, param2, query2, frag2)) return fp
def _ensure_api(self): if self._api_url: return user = self._wsc.sites.list_site_publishing_credentials( self._resource_group, self._website ).result() scm_uri = user.scm_uri scheme, netloc, path, query, fragment = urlsplit(scm_uri) userpass, netloc = splituser(netloc) self._api_auth = splitpasswd(userpass) self._api_url = urlunsplit((scheme, netloc, path, query, fragment))
def get_host_info(self, host): x509 = {} if isinstance(host, tuple): host, x509 = host auth, host = splituser(host) if auth: auth = base64.encodestring(unquote(auth)) auth = "".join(auth.split()) # get rid of whitespace extra_headers = [ ("Authorization", "Basic " + auth) ] else: extra_headers = None return host, extra_headers, x509
def open_with_auth(url, opener=urllib.request.urlopen): """Open a urllib2 request, handling HTTP authentication""" scheme, netloc, path, params, query, frag = urllib.parse.urlparse(url) # Double scheme does not raise on Mac OS X as revealed by a # failing test. We would expect "nonnumeric port". Refs #20. if netloc.endswith(':'): raise http_client.InvalidURL("nonnumeric port: ''") if scheme in ('http', 'https'): auth, host = splituser(netloc) else: auth = None if not auth: cred = PyPIConfig().find_credential(url) if cred: auth = str(cred) info = cred.username, url log.info('Authenticating as %s for %s (from .pypirc)', *info) if auth: auth = "Basic " + _encode_auth(auth) parts = scheme, host, path, params, query, frag new_url = urllib.parse.urlunparse(parts) request = urllib.request.Request(new_url) request.add_header("Authorization", auth) else: request = urllib.request.Request(url) request.add_header('User-Agent', user_agent) fp = opener(request) if auth: # Put authentication info back into request URL if same host, # so that links found on the page will work s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url) if s2 == scheme and h2 == host: parts = s2, netloc, path2, param2, query2, frag2 fp.url = urllib.parse.urlunparse(parts) return fp
def _download_svn(self, url, filename): url = url.split('#', 1)[0] # remove any fragment for svn's sake creds = '' if url.lower().startswith('svn:') and '@' in url: scheme, netloc, path, p, q, f = urllib.parse.urlparse(url) if not netloc and path.startswith('//') and '/' in path[2:]: netloc, path = path[2:].split('/', 1) auth, host = splituser(netloc) if auth: if ':' in auth: user, pw = auth.split(':', 1) creds = " --username=%s --password=%s" % (user, pw) else: creds = " --username="******"Doing subversion checkout from %s to %s", url, filename) os.system("svn checkout%s -q %s %s" % (creds, url, filename)) return filename
def parse_proxy(proxy): """ _parse_proxy from urllib """ scheme, r_scheme = splittype(proxy) if not r_scheme.startswith("/"): # authority scheme = None authority = proxy else: # URL if not r_scheme.startswith("//"): raise ValueError("proxy URL with no authority: %r" % proxy) # We have an authority, so for RFC 3986-compliant URLs (by ss 3. # and 3.3.), path is empty or starts with '/' end = r_scheme.find("/", 2) if end == -1: end = None authority = r_scheme[2:end] userinfo, hostport = splituser(authority) if userinfo is not None: user, password = splitpasswd(userinfo) else: user = password = None return scheme, user, password, hostport
def getUrlFd(url, headers=None, data=None, timeout=None): """getUrlFd(url, headers=None, data=None, timeout=None) Opens the given url and returns a file object. Headers and data are a dict and string, respectively, as per urllib.request.Request's arguments.""" if headers is None: headers = defaultHeaders if minisix.PY3 and isinstance(data, str): data = data.encode() try: if not isinstance(url, Request): (scheme, loc, path, query, frag) = urlsplit(url) (user, host) = splituser(loc) url = urlunsplit((scheme, host, path, query, '')) request = Request(url, headers=headers, data=data) if user: request.add_header('Authorization', 'Basic %s' % base64.b64encode(user)) else: request = url request.add_data(data) fd = urlopen(request, timeout=timeout) return fd except socket.timeout as e: raise Error(TIMED_OUT) except sockerrors as e: raise Error(strError(e)) except InvalidURL as e: raise Error('Invalid URL: %s' % e) except HTTPError as e: raise Error(strError(e)) except URLError as e: raise Error(strError(e.reason)) # Raised when urllib doesn't recognize the url type except ValueError as e: raise Error(strError(e))
def check_long_url(long_url): """ check long URL, raise error when invalid invalid include: 1. invalid scheme 2. IP host :param long_url: :return: ValueError """ url = parse.urlsplit(long_url) # check scheme if url.scheme.lower() not in SCHEMES: raise ValueError("invalid scheme: {}, expect {}".format( url.scheme.lower(), SCHEMES)) # check host user, netloc = parse.splituser(url.netloc) host, port = parse.splitport(netloc) try: # test for IPv4 socket.inet_pton(socket.AF_INET, host) except socket.error: try: # test for IPv6 ipv6 = str.rstrip(str.lstrip(host, '['), ']') socket.inet_pton(socket.AF_INET6, ipv6) except socket.error: # host isn't IP (expected) pass else: raise ValueError("invalid host in {}, unexpected IPv6 {}".format( long_url, ipv6)) else: raise ValueError("invalid host in {}, unexpected IPv4 {}".format( long_url, host))
def ftp_open(self, req): host = req.get_host() if not host: raise IOError('ftp error', 'no host given') host, port = splitport(host) if port is None: port = ftplib.FTP_PORT else: port = int(port) # username/password handling user, host = splituser(host) if user: user, passwd = splitpasswd(user) else: passwd = None host = unquote(host) user = unquote(user or '') passwd = unquote(passwd or '') try: host = socket.gethostbyname(host) except socket.error as msg: raise URLError(msg) path, attrs = splitattr(req.get_selector()) dirs = path.split('/') dirs = map(unquote, dirs) dirs, file = dirs[:-1], dirs[-1] if dirs and not dirs[0]: dirs = dirs[1:] try: fw = self.connect_ftp(user, passwd, host, port, dirs) type = file and 'I' or 'D' for attr in attrs: attr, value = splitattr(attr) if attr.lower() == 'type' and \ value in ('a', 'A', 'i', 'I', 'd', 'D'): type = value.upper() # -- range support modifications start here rest = None range_tup = range_header_to_tuple(req.headers.get('Range', None)) assert range_tup != () if range_tup: (fb, lb) = range_tup if fb > 0: rest = fb # -- range support modifications end here fp, retrlen = fw.retrfile(file, type, rest) # -- range support modifications start here if range_tup: (fb, lb) = range_tup if lb == '': if retrlen is None or retrlen == 0: raise RangeError( 9, 'Requested Range Not Satisfiable due to unobtainable file length.' ) lb = retrlen retrlen = lb - fb if retrlen < 0: # beginning of range is larger than file raise RangeError(9, 'Requested Range Not Satisfiable') else: retrlen = lb - fb fp = RangeableFileObject(fp, (0, retrlen)) # -- range support modifications end here headers = "" mtype = mimetypes.guess_type(req.get_full_url())[0] if mtype: headers += "Content-Type: %s\n" % mtype if retrlen is not None and retrlen >= 0: headers += "Content-Length: %d\n" % retrlen sf = StringIO(headers) headers = mimetools.Message(sf) return addinfourl(fp, headers, req.get_full_url()) except ftplib.all_errors as msg: raise IOError('ftp error', msg).with_traceback(sys.exc_info()[2])
def _parse_proxy(proxy): """Return (scheme, user, password, host/port) given a URL or an authority. If a URL is supplied, it must have an authority (host:port) component. According to RFC 3986, having an authority component means the URL must have two slashes after the scheme: >>> _parse_proxy('file:/ftp.example.com/') Traceback (most recent call last): ValueError: proxy URL with no authority: 'file:/ftp.example.com/' The first three items of the returned tuple may be None. Examples of authority parsing: >>> _parse_proxy('proxy.example.com') (None, None, None, 'proxy.example.com') >>> _parse_proxy('proxy.example.com:3128') (None, None, None, 'proxy.example.com:3128') The authority component may optionally include userinfo (assumed to be username:password): >>> _parse_proxy('joe:[email protected]') (None, 'joe', 'password', 'proxy.example.com') >>> _parse_proxy('joe:[email protected]:3128') (None, 'joe', 'password', 'proxy.example.com:3128') Same examples, but with URLs instead: >>> _parse_proxy('http://proxy.example.com/') ('http', None, None, 'proxy.example.com') >>> _parse_proxy('http://proxy.example.com:3128/') ('http', None, None, 'proxy.example.com:3128') >>> _parse_proxy('http://*****:*****@proxy.example.com/') ('http', 'joe', 'password', 'proxy.example.com') >>> _parse_proxy('http://*****:*****@proxy.example.com:3128') ('http', 'joe', 'password', 'proxy.example.com:3128') Everything after the authority is ignored: >>> _parse_proxy('ftp://*****:*****@proxy.example.com/rubbish:3128') ('ftp', 'joe', 'password', 'proxy.example.com') Test for no trailing '/' case: >>> _parse_proxy('http://*****:*****@proxy.example.com') ('http', 'joe', 'password', 'proxy.example.com') """ scheme, r_scheme = splittype(proxy) if not r_scheme.startswith("/"): # authority scheme = None authority = proxy else: # URL if not r_scheme.startswith("//"): raise ValueError("proxy URL with no authority: %r" % proxy) # We have an authority, so for RFC 3986-compliant URLs (by ss 3. # and 3.3.), path is empty or starts with '/' end = r_scheme.find("/", 2) if end == -1: end = None authority = r_scheme[2:end] userinfo, hostport = splituser(authority) if userinfo is not None: user, password = splitpasswd(userinfo) else: user = password = None return scheme, user, password, hostport
def smb_open(self, req): global USE_NTLM, MACHINE_NAME host = req.get_host() if not host: raise urllib.error.URLError('SMB error: no host given') host, port = splitport(host) if port is None: port = 139 else: port = int(port) # username/password handling user, host = splituser(host) if user: user, passwd = splitpasswd(user) else: passwd = None host = unquote(host) user = user or '' passwd = passwd or '' myname = MACHINE_NAME or self.generateClientMachineName() n = NetBIOS() names = n.queryIPForName(host) if names: server_name = names[0] else: raise urllib.error.URLError('SMB error: Hostname does not reply back with its machine name') path, attrs = splitattr(req.get_selector()) if path.startswith('/'): path = path[1:] dirs = path.split('/') dirs = list(map(unquote, dirs)) service, path = dirs[0], '/'.join(dirs[1:]) try: conn = SMBConnection(user, passwd, myname, server_name, use_ntlm_v2 = USE_NTLM) conn.connect(host, port) headers = email.message.Message() if req.has_data(): data_fp = req.get_data() filelen = conn.storeFile(service, path, data_fp) headers.add_header('Content-length', '0') fp = BytesIO(b"") else: fp = self.createTempFile() file_attrs, retrlen = conn.retrieveFile(service, path, fp) fp.seek(0) mtype = mimetypes.guess_type(req.get_full_url())[0] if mtype: headers.add_header('Content-type', mtype) if retrlen is not None and retrlen >= 0: headers.add_header('Content-length', '%d' % retrlen) return addinfourl(fp, headers, req.get_full_url()) except Exception as ex: raise urllib.error.URLError('smb error: %s' % ex).with_traceback(sys.exc_info()[2])
def smb_open(self, req): global USE_NTLM, MACHINE_NAME if not req.host: raise urllib.error.URLError('SMB error: no host given') host, port = splitport(req.host) if port is None: port = 139 else: port = int(port) # username/password handling user, host = splituser(host) if user: user, passwd = splitpasswd(user) else: passwd = None host = unquote(host) user = user or '' domain = '' if ';' in user: domain, user = user.split(';', 1) passwd = passwd or '' myname = MACHINE_NAME or self.generateClientMachineName() server_name, host = host.split(',') if ',' in host else [None, host] if server_name is None: n = NetBIOS() names = n.queryIPForName(host) if names: server_name = names[0] else: raise urllib.error.URLError( 'SMB error: Hostname does not reply back with its machine name' ) path, attrs = splitattr(req.selector) if path.startswith('/'): path = path[1:] dirs = path.split('/') dirs = list(map(unquote, dirs)) service, path = dirs[0], '/'.join(dirs[1:]) try: conn = SMBConnection(user, passwd, myname, server_name, domain=domain, use_ntlm_v2=USE_NTLM) print('about to connect') conn.connect(host, port) headers = email.message.Message() if req.data: filelen = conn.storeFile(service, path, req.data) headers.add_header('Content-length', '0') fp = BytesIO(b"") else: fp = self.createTempFile() file_attrs, retrlen = conn.retrieveFile(service, path, fp) fp.seek(0) mtype = mimetypes.guess_type(req.get_full_url())[0] if mtype: headers.add_header('Content-type', mtype) if retrlen is not None and retrlen >= 0: headers.add_header('Content-length', '%d' % retrlen) return addinfourl(fp, headers, req.get_full_url()) except Exception as ex: raise urllib.error.URLError('smb error: %s' % ex).with_traceback( sys.exc_info()[2])
def open_http(self, url, data=None): """Use HTTP protocol.""" from eventlib.green import httplib user_passwd = None proxy_passwd = None if isinstance(url, str): host, selector = splithost(url) if host: user_passwd, host = splituser(host) host = unquote(host) realhost = host else: host, selector = url # check whether the proxy contains authorization information proxy_passwd, host = splituser(host) # now we proceed with the url we want to obtain urltype, rest = splittype(selector) url = rest user_passwd = None if urltype.lower() != 'http': realhost = None else: realhost, rest = splithost(rest) if realhost: user_passwd, realhost = splituser(realhost) if user_passwd: selector = "%s://%s%s" % (urltype, realhost, rest) if proxy_bypass(realhost): host = realhost #print "proxy via http:", host, selector if not host: raise IOError('http error', 'no host given') if proxy_passwd: import base64 proxy_auth = base64.b64encode(proxy_passwd).strip() else: proxy_auth = None if user_passwd: import base64 auth = base64.b64encode(user_passwd).strip() else: auth = None h = httplib.HTTP(host) if data is not None: h.putrequest('POST', selector) h.putheader('Content-Type', 'application/x-www-form-urlencoded') h.putheader('Content-Length', '%d' % len(data)) else: h.putrequest('GET', selector) if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) if auth: h.putheader('Authorization', 'Basic %s' % auth) if realhost: h.putheader('Host', realhost) for args in self.addheaders: h.putheader(*args) h.endheaders() if data is not None: h.send(data) errcode, errmsg, headers = h.getreply() if errcode == -1: # something went wrong with the HTTP status line raise IOError('http protocol error', 0, 'got a bad status line', None) fp = h.getfile() if errcode == 200: return addinfourl(fp, headers, "http:" + url) else: if data is None: return self.http_error(url, fp, errcode, errmsg, headers) else: return self.http_error(url, fp, errcode, errmsg, headers, data)
def open_ftp(self, url): """Use FTP protocol.""" if not isinstance(url, str): raise IOError( 'ftp error', 'proxy support for ftp protocol currently not implemented') import mimetypes, mimetools try: from io import StringIO except ImportError: from io import StringIO host, path = splithost(url) if not host: raise IOError('ftp error', 'no host given') host, port = splitport(host) user, host = splituser(host) if user: user, passwd = splitpasswd(user) else: passwd = None host = unquote(host) user = unquote(user or '') passwd = unquote(passwd or '') host = socket.gethostbyname(host) if not port: from eventlib.green import ftplib port = ftplib.FTP_PORT else: port = int(port) path, attrs = splitattr(path) path = unquote(path) dirs = path.split('/') dirs, file = dirs[:-1], dirs[-1] if dirs and not dirs[0]: dirs = dirs[1:] if dirs and not dirs[0]: dirs[0] = '/' key = user, host, port, '/'.join(dirs) # XXX thread unsafe! if len(self.ftpcache) > MAXFTPCACHE: # Prune the cache, rather arbitrarily for k in list(self.ftpcache.keys()): if k != key: v = self.ftpcache[k] del self.ftpcache[k] v.close() try: if not key in self.ftpcache: self.ftpcache[key] = \ ftpwrapper(user, passwd, host, port, dirs) if not file: type = 'D' else: type = 'I' for attr in attrs: attr, value = splitvalue(attr) if attr.lower() == 'type' and \ value in ('a', 'A', 'i', 'I', 'd', 'D'): type = value.upper() (fp, retrlen) = self.ftpcache[key].retrfile(file, type) mtype = mimetypes.guess_type("ftp:" + url)[0] headers = "" if mtype: headers += "Content-Type: %s\n" % mtype if retrlen is not None and retrlen >= 0: headers += "Content-Length: %d\n" % retrlen headers = mimetools.Message(StringIO(headers)) return addinfourl(fp, headers, "ftp:" + url) except ftperrors() as msg: raise IOError('ftp error', msg).with_traceback(sys.exc_info()[2])
def ftp_open(self, req): """ When ftp requests are made using this handler, this function gets called at some point, and it in turn calls the ``connect_ftp`` method. In this subclass's reimplementation of ``connect_ftp``, the FQDN of the request's host is needed for looking up login credentials in the password manager. However, by the time ``connect_ftp`` is called, that information has been stripped away, and the host argument passed to ``connect_ftp`` contains only the host's IP address instead of the FQDN. This reimplementation of ``ftp_open``, which is little more than a copy-and-paste from the superclass's implementation, captures the original host FQDN before it is replaced with the IP address and saves it for later use. This reimplementation also ensures that the file size appears in the response header by querying for it directly. For some FTP servers the original implementation should handle this (``retrlen`` should contain the file size). However, for others this can fail silently due to the server response not matching an anticipated regular expression. """ import sys import email import socket from urllib.error import URLError from urllib.parse import splitattr, splitpasswd, splitvalue from urllib.response import addinfourl #################################################### # COPIED FROM FTPHandler.ftp_open (PYTHON 3.6.6) # # WITH JUST A FEW ADDITIONS # #################################################### import ftplib import mimetypes host = req.host if not host: raise URLError('ftp error: no host given') host, port = splitport(host) if port is None: port = ftplib.FTP_PORT else: port = int(port) # username/password handling user, host = splituser(host) if user: user, passwd = splitpasswd(user) else: passwd = None host = unquote(host) user = user or '' passwd = passwd or '' ############################################ # DIFFERENT FROM FTPHandler.ftp_open # save the host FQDN for later self.last_req_host = host ############################################ try: host = socket.gethostbyname(host) except OSError as msg: raise URLError(msg) path, attrs = splitattr(req.selector) dirs = path.split('/') dirs = list(map(unquote, dirs)) dirs, file = dirs[:-1], dirs[-1] if dirs and not dirs[0]: dirs = dirs[1:] try: fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) type = file and 'I' or 'D' for attr in attrs: attr, value = splitvalue(attr) if attr.lower() == 'type' and \ value in ('a', 'A', 'i', 'I', 'd', 'D'): type = value.upper() ############################################ # DIFFERENT FROM FTPHandler.ftp_open size = fw.ftp.size(file) ############################################ fp, retrlen = fw.retrfile(file, type) headers = "" mtype = mimetypes.guess_type(req.full_url)[0] if mtype: headers += "Content-type: %s\n" % mtype if retrlen is not None and retrlen >= 0: headers += "Content-length: %d\n" % retrlen ############################################ # DIFFERENT FROM FTPHandler.ftp_open elif size is not None and size >= 0: headers += "Content-length: %d\n" % size ############################################ headers = email.message_from_string(headers) return addinfourl(fp, headers, req.full_url) except ftplib.all_errors as exp: exc = URLError('ftp error: %r' % exp) raise exc.with_traceback(sys.exc_info()[2])
file.close() os.unlink(filename) raise DistutilsError("Unexpected HTML page found at "+url) def _download_svn(self, url, filename): url = url.split('#',1)[0] # remove any fragment for svn's sake creds = '' if url.lower().startswith('svn:') and '@' in url: <<<<<<< HEAD scheme, netloc, path, p, q, f = urlparse(url) ======= scheme, netloc, path, p, q, f = urllib.parse.urlparse(url) >>>>>>> 54eef0be98b1b67c8507db91f4cfa90b64991027 if not netloc and path.startswith('//') and '/' in path[2:]: netloc, path = path[2:].split('/',1) auth, host = splituser(netloc) if auth: if ':' in auth: user, pw = auth.split(':',1) creds = " --username=%s --password=%s" % (user, pw) else: creds = " --username="******"Doing subversion checkout from %s to %s", url, filename) os.system("svn checkout%s -q %s %s" % (creds, url, filename))