def testscheme_open(self, req): try: selector = req.get_selector() if selector == u'/ws_newcompass.asmx?WSDL': return urllib.addinfourl( pkg_resources.resource_stream(__name__, 'tests/testdata/wsdl.xml'), httplib.HTTPMessage(open('/dev/null')), req.get_full_url(), 200 ) elif selector == u'/ws_newcompass.asmx': soapResponse = urlparse.urlparse(req.get_header('Soapaction')).path.strip('"').split('/')[-1] + '.xml' return urllib.addinfourl( pkg_resources.resource_stream(__name__, 'tests/testdata/' + soapResponse), httplib.HTTPMessage(open('/dev/null')), req.get_full_url(), 200 ) elif selector == u'/biomuta.tsv': return urllib2.addinfourl( pkg_resources.resource_stream(__name__, 'tests/testdata/Biomuta.tsv'), httplib.HTTPMessage(open('/dev/null')), req.get_full_url(), 200 ) else: raise urllib2.URLError('Not found') except Exception: raise urllib2.URLError('Not found')
def fake_urlopen(self, url): """Fake urlopen using test client""" if 'example' in url: response = cStringIO.StringIO('') return addinfourl(response, {'X-Pingback': '/xmlrpc.php'}, url) elif 'localhost' in url: response = cStringIO.StringIO('<link rel="pingback" href="/xmlrpc/">') return addinfourl(response, {}, url)
def fake_urlopen(self, url): """Fake urlopen using test client""" if 'example' in url: response = cStringIO.StringIO('') return addinfourl(response, {'X-Pingback': '/xmlrpc.php'}, url) else: response = cStringIO.StringIO(self.client.get(url).content) return addinfourl(response, {}, url)
def fake_urlopen(self, url): """Fake urlopen using test client""" if 'example' in url: response = cStringIO.StringIO('') return addinfourl(response, {'X-Pingback': '/xmlrpc.php', 'Content-Type': 'text/html'}, url) elif 'localhost' in url: response = cStringIO.StringIO( '<link rel="pingback" href="/xmlrpc/">') return addinfourl(response, {'Content-Type': 'text/xhtml'}, url) elif 'google' in url: response = cStringIO.StringIO('PNG CONTENT') return addinfourl(response, {'content-type': 'image/png'}, url) elif 'error' in url: raise URLError('Invalid ressource')
def test_returns_response_when_successful_response(self, urlopen): resp = addinfourl(StringIO(u"mock_content"), "mock headers", url="http://www.example.com/", code="200") urlopen.return_value = resp api_stub = ClientStub() response = api_stub.do_something() self.assertEqual(resp, response)
def decode (page): """Gunzip or deflate a compressed page.""" log.debug(LOG_CHECK, "page info %d %s", page.code, str(page.info())) encoding = page.info().get("Content-Encoding") if encoding in ('gzip', 'x-gzip', 'deflate'): # cannot seek in socket descriptors, so must get content now content = page.read() try: if encoding == 'deflate': fp = StringIO(zlib.decompress(content)) else: fp = gzip.GzipFile('', 'rb', 9, StringIO(content)) except zlib.error as msg: log.debug(LOG_CHECK, "uncompressing had error " "%s, assuming non-compressed content", str(msg)) fp = StringIO(content) # remove content-encoding header headers = httplib.HTTPMessage(StringIO("")) ceheader = re.compile(r"(?i)content-encoding:") for h in page.info().keys(): if not ceheader.match(h): headers[h] = page.info()[h] newpage = urllib.addinfourl(fp, headers, page.geturl()) newpage.code = page.code newpage.msg = page.msg return newpage return page
def open_local_file(self, req): import mimetypes import mimetools host = req.get_host() file = req.get_selector() localfile = urllib.url2pathname(file) stats = os.stat(localfile) size = stats[stat.ST_SIZE] modified = rfc822.formatdate(stats[stat.ST_MTIME]) mtype = mimetypes.guess_type(file)[0] if host: host, port = urllib.splitport(host) if port or socket.gethostbyname(host) not in self.get_names(): raise urllib2.URLError("file not on local host") fo = open(localfile, "rb") brange = req.headers.get("Range", None) brange = range_header_to_tuple(brange) assert brange != () if brange: (fb, lb) = brange if lb == "": lb = size if fb < 0 or fb > size or lb > size: raise RangeError("Requested Range Not Satisfiable") size = lb - fb fo = RangeableFileObject(fo, (fb, lb)) headers = mimetools.Message( StringIO( "Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n" % (mtype or "text/plain", size, modified) ) ) return urllib.addinfourl(fo, headers, "file:" + file)
def open_local_file(self, req): import email.utils import mimetypes host = req.get_host() filename = req.get_selector() localfile = url2pathname(filename) try: stats = os.stat(localfile) size = stats.st_size modified = email.utils.formatdate(stats.st_mtime, usegmt=True) mtype = mimetypes.guess_type(filename)[0] headers = mimetools.Message( StringIO( "Content-type: %s\nContent-length: %d\nLast-modified: %s\n" % (mtype or "text/plain", size, modified) ) ) if host: host, port = splitport(host) if not host or (not port and _safe_gethostbyname(host) in self.get_names()): if host: origurl = "file://" + host + filename else: origurl = "file://" + filename return addinfourl(open(localfile, "rb"), headers, origurl) except OSError, msg: # urllib2 users shouldn't expect OSErrors coming from urlopen() raise URLError(msg)
def open_local_file(self, req): try: import email.utils as emailutils except ImportError: # python 2.4 import email.Utils as emailutils import mimetypes host = req.get_host() file = req.get_selector() localfile = url2pathname(file) try: stats = os.stat(localfile) size = stats.st_size modified = emailutils.formatdate(stats.st_mtime, usegmt=True) mtype = mimetypes.guess_type(file)[0] headers = mimetools.Message(StringIO( 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified))) if host: host, port = splitport(host) if not host or \ (not port and socket.gethostbyname(host) in self.get_names()): return addinfourl(open(localfile, 'rb'), headers, 'file:'+file) except OSError, msg: # urllib2 users shouldn't expect OSErrors coming from urlopen() raise URLError(msg)
def decode(page): """ Gunzip or deflate a compressed page. """ encoding = page.info().get("Content-Encoding") # note: some servers send content encoding gzip if file ends with ".gz" # but we don't want to decompress such files if encoding in ('gzip', 'x-gzip', 'deflate') and \ not page.geturl().endswith(".gz"): # cannot seek in socket descriptors, so must get content now content = page.read() if encoding == 'deflate': fp = StringIO.StringIO(zlib.decompress(content)) else: fp = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content)) # remove content-encoding header headers = {} ceheader = re.compile(r"(?i)content-encoding:") for h in page.info().keys(): if not ceheader.match(h): headers[h] = page.info()[h] newpage = urllib.addinfourl(fp, headers, page.geturl()) if hasattr(page, "code"): # python 2.4 compatibility newpage.code = page.code if hasattr(page, "msg"): # python 2.4 compatibility newpage.msg = page.msg page = newpage return page
def do_open(self, http_class, req): host = req.get_host() if not host: raise URLError('no host given') try: h = http_class(host) # will parse host:port if req.has_data(): data = req.get_data() h.putrequest('POST', req.get_selector()) if not req.headers.has_key('Content-type'): h.putheader('Content-type', 'application/x-www-form-urlencoded') if not req.headers.has_key('Content-length'): h.putheader('Content-length', '%d' % len(data)) else: h.putrequest('GET', req.get_selector()) except socket.error(err): raise URLError(err) h.putheader('Host', host) for args in self.parent.addheaders: h.putheader(*args) for k, v in req.headers.items(): h.putheader(k, v) h.endheaders() if req.has_data(): h.send(data) code, msg, hdrs = h.getreply() fp = h.getfile() if code == 200: return addinfourl(fp, hdrs, req.get_full_url()) else: return self.parent.error('http', req, fp, code, msg, hdrs)
def _create_urllib_data(self, qt_network_reply): qt_network_request = qt_network_reply.request() request_url = qt_network_request.url() request_headers = {} for header_name in qt_network_request.rawHeaderList(): header = qt_network_request.rawHeader(header_name) request_headers.update({header_name.data():header.data()}) url = request_url.toEncoded().data() self.urllib_request = UrllibRequest(url, headers=request_headers) #py2: output_file = StringIO.StringIO() output_file = StringIO() raw_header_pairs = qt_network_reply.rawHeaderPairs() headers = [] for header in raw_header_pairs: hd_string = '%s: %s' % (header[0], header[1]) output_file.write(hd_string) headers.append(hd_string) output_file.write("\n") output_file.write(str(qt_network_reply.sniffed_data)) headers_mstr = email.message_from_string('\n'.join(headers)) origurl = qt_network_reply.url().toEncoded().data() self.urllib_response = addinfourl(output_file, headers_mstr, origurl)
def open(self, fullurl, data=None, method=None): """Use URLopener().open(file) instead of open(file, 'r').""" fullurl = unwrap(toBytes(fullurl)) # percent encode url, fixing lame server errors for e.g, like space # within url paths. fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") if self.tempcache and fullurl in self.tempcache: filename, headers = self.tempcache[fullurl] fp = open(filename, 'rb') return addinfourl(fp, headers, fullurl) urltype, url = splittype(fullurl) if not urltype: urltype = 'file' if urltype in self.proxies: proxy = self.proxies[urltype] urltype, proxyhost = splittype(proxy) host, selector = splithost(proxyhost) url = (host, fullurl) # Signal special case to open_*() else: proxy = None name = 'open_' + urltype self.type = urltype name = name.replace('-', '_') if not hasattr(self, name): if proxy: return self.open_unknown_proxy(proxy, fullurl, data) else: return self.open_unknown(fullurl, data) try: return getattr(self, name)(url, data, method) except socket.error, msg: raise IOError, ('socket error', msg), sys.exc_info()[2]
def response_to_twill(self, response): """ Wrap Django response to work with Twill. """ path = response.request.get('PATH_INFO') url = path and SITE + path.lstrip('/') or path headers_msg = '\n'.join('%s: %s' % (k, v) for k, v in response.items()) headers_msg = StringIO(headers_msg) headers = httplib.HTTPMessage(headers_msg) io_response = StringIO(response.content) urllib_response = addinfourl(io_response, headers, url, response.status_code) urllib_response._headers = headers urllib_response._url = url urllib_response.msg = u'OK' urllib_response.seek = urllib_response.fp.seek self.get_browser()._browser._set_response(urllib_response, False) self.get_browser().result = ResultWrapper(response.status_code, url, response.content) self._apply_xhtml()
def decode (page): "gunzip or deflate a compressed page" #print page.info().headers encoding = page.info().get("Content-Encoding") if encoding in ('gzip', 'x-gzip', 'deflate'): from cStringIO import StringIO # cannot seek in socket descriptors, so must get content now content = page.read() if encoding == 'deflate': import zlib fp = StringIO(zlib.decompress(content)) else: import gzip fp = gzip.GzipFile('', 'rb', 9, StringIO(content)) # remove content-encoding header headers = httplib.HTTPMessage(StringIO("")) ceheader = re.compile(r"(?i)content-encoding:") for h in page.info().keys(): if not ceheader.match(h): headers[h] = page.info()[h] newpage = urllib.addinfourl(fp, headers, page.geturl()) # Propagate code, msg through if hasattr(page, 'code'): newpage.code = page.code if hasattr(page, 'msg'): newpage.msg = page.msg return newpage return page
def http_error_302(self, req, fp, code, msg, headers): import urllib infourl = urllib.addinfourl(fp, headers, headers["Location"]) infourl.status = code infourl.code = code return infourl
def _make_response(self, result, url): data = "\r\n".join(["%s: %s" % (k, v) for k, v in result.header_items]) headers = httplib.HTTPMessage(StringIO(data)) response = urllib.addinfourl(StringIO(result.data), headers, url) code, msg = result.status.split(None, 1) response.code, response.msg = int(code), msg return response
def text_to_twill(self, text): """ Wrap text to work with Twill. """ headers_msg = 'Content: text-plain; encoding=utf-8\n' headers_msg = StringIO(headers_msg) headers = httplib.HTTPMessage(headers_msg) status_code = 200 url = 'text://' io_response = StringIO(text) urllib_response = addinfourl(io_response, headers, url, status_code) urllib_response._headers = headers urllib_response._url = url urllib_response.msg = u'OK' urllib_response.seek = urllib_response.fp.seek self.get_browser()._browser._factory.set_response(urllib_response) self.get_browser().result = ResultWrapper(status_code, url, text) self._apply_xhtml()
def exec_open (self, req): path = req.get_selector() args = path.split("?", 1) if len(args) == 1: args.append('') #print "args ", args # Prepare CGI-like environment os.putenv ('GATEWAY_INTERFACE', 'CGI/1.1') os.putenv ('HTTP_ACCEPT_ENCODING', req.headers.get ('Accept-encoding')) os.putenv ('HTTP_USER_AGENT', 'DBS-CGI-Direct-call') os.putenv ('REQUEST_METHOD', 'POST') os.putenv ('CONTENT_LENGTH', str(req.headers.get ('Content-length'))) os.putenv ('CONTENT_TYPE', req.headers.get ('Content-type')) os.putenv ('QUERY_STRING', args[1]) os.putenv ('REQUEST_URI', path) os.putenv ('SCRIPT_NAME', args[0]) os.putenv ('SERVER_NAME', 'localhost') os.putenv ('SERVER_PORT', str(80)) os.putenv ('SERVER_PROTOCOL', 'HTTP/1.1') os.putenv ('SERVER_SOFTWARE', 'Builtin') # Open subprocess and write form data r, w = os.popen2(args[0]) r.write (req.get_data()) r.close () # Read back headers, then leave the body to be read msg = httplib.HTTPMessage (w, 0) msg.fp = None return urllib.addinfourl (w, msg, path)
def do_open(self, http_class, req): host = req.get_host() if not host: raise URLError('no host given') h = http_class(host, timeout=req.timeout) h.set_debuglevel(self._debuglevel) headers = dict(req.unredirected_hdrs) headers.update(dict(((k, v) for k, v in req.headers.items() if k not in headers))) headers['Connection'] = 'close' headers = dict(((name.title(), val) for name, val in headers.items())) if req._tunnel_host: tunnel_headers = {} proxy_auth_hdr = 'Proxy-Authorization' if proxy_auth_hdr in headers: tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] del headers[proxy_auth_hdr] h.set_tunnel(req._tunnel_host, headers=tunnel_headers) try: h.request(req.get_method(), req.get_selector(), req.data, headers) try: r = h.getresponse(buffering=True) except TypeError: r = h.getresponse() except socket.error as err: raise URLError(err) fp = socket._fileobject(RecvWrapper(r), close=True) resp = addinfourl(fp, r.msg, req.get_full_url()) resp.code = r.status resp.msg = r.reason return resp
def open_local_file(self, req): import mimetypes import email host = req.get_host() file = req.get_selector() localfile = urllib.url2pathname(file) stats = os.stat(localfile) size = stats[stat.ST_SIZE] modified = email.Utils.formatdate(stats[stat.ST_MTIME]) mtype = mimetypes.guess_type(file)[0] if host: host, port = urllib.splitport(host) if port or socket.gethostbyname(host) not in self.get_names(): raise urllib2.URLError('file not on local host') fo = open(localfile,'rb') brange = req.headers.get('Range', None) brange = range_header_to_tuple(brange) assert brange != () if brange: (fb, lb) = brange if lb == '': lb = size if fb < 0 or fb > size or lb > size: raise RangeError('Requested Range Not Satisfiable') size = (lb - fb) fo = RangeableFileObject(fo, (fb, lb)) headers = email.message_from_string( 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' % (mtype or 'text/plain', size, modified)) return urllib.addinfourl(fo, headers, 'file:'+file)
def http_error_302(self, req, fp, code, msg, headers): infourl = urllib.addinfourl(fp, headers, req.get_full_url()) infourl.status = code infourl.code = code logging.debug('NoRedirectHandler got redirect to + ' + headers['Location']) self.got_redirect = True return infourl
def open_http(self, url, data=None): """Use HTTP protocol.""" import httplib user_passwd = None if type(url) is type(""): host, selector = splithost(url) if host: user_passwd, host = splituser(host) host = unquote(host) realhost = host else: host, selector = url urltype, rest = splittype(selector) url = rest user_passwd = None if string.lower(urltype) != 'http': realhost = None else: realhost, rest = splithost(rest) if realhost: user_passwd, realhost = splituser(realhost) if user_passwd: selector = "%s://%s%s" % (urltype, realhost, rest) #print "proxy via http:", host, selector if not host: raise IOError, ('http error', 'no host given') if user_passwd: import base64 auth = string.strip(base64.encodestring(user_passwd)) else: auth = None h = httplib.HTTP(host) if data is not None: h.putrequest('POST', selector) h.putheader('Content-type', 'application/x-www-form-urlencoded') h.putheader('Content-length', '%d' % len(data)) else: h.putrequest('GET', selector) for cookie in self.cookies.items(): h.putheader('Cookie', '%s=%s;' % cookie) if auth: h.putheader('Authorization', 'Basic %s' % auth) if realhost: h.putheader('Host', realhost) for args in self.addheaders: apply(h.putheader, args) h.endheaders() if data is not None: h.send(data + '\r\n') errcode, errmsg, headers = h.getreply() if headers and headers.has_key('set-cookie'): cookies = headers.getallmatchingheaders('set-cookie') for cookie in cookies: self.cookies.load(cookie) fp = h.getfile() if errcode == 200: return addinfourl(fp, headers, "http:" + url) else: if data is None: return self.http_error(url, fp, errcode, errmsg, headers) else: return self.http_error(url, fp, errcode, errmsg, headers, data)
def retry_using_http_NTLM_auth(self, req, auth_header_field, realm, headers): user, pw = self.passwd.find_user_password(realm, req.get_full_url()) if pw is not None: user_parts = user.split('\\', 1) if len(user_parts) == 1: UserName = user_parts[0] DomainName = '' type1_flags = ntlm.NTLM_TYPE1_FLAGS & ~ntlm.NTLM_NegotiateOemDomainSupplied else: DomainName = user_parts[0].upper() UserName = user_parts[1] type1_flags = ntlm.NTLM_TYPE1_FLAGS # ntlm secures a socket, so we must use the same socket for the complete handshake headers = dict(req.headers) headers.update(req.unredirected_hdrs) auth = 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(user, type1_flags) if req.headers.get(self.auth_header, None) == auth: return None headers[self.auth_header] = auth host = req.get_host() if not host: raise urllib2.URLError('no host given') h = None if req.get_full_url().startswith('https://'): h = httplib.HTTPSConnection(host) # will parse host:port else: h = httplib.HTTPConnection(host) # will parse host:port h.set_debuglevel(self._debuglevel) # we must keep the connection because NTLM authenticates the connection, not single requests headers["Connection"] = "Keep-Alive" headers = dict((name.title(), val) for name, val in headers.items()) h.request(req.get_method(), req.get_selector(), req.data, headers) r = h.getresponse() r.begin() r._safe_read(int(r.getheader('content-length'))) if r.getheader('set-cookie'): # this is important for some web applications that store authentication-related info in cookies (it took a long time to figure out) headers['Cookie'] = r.getheader('set-cookie') r.fp = None # remove the reference to the socket, so that it can not be closed by the response object (we want to keep the socket open) auth_header_value = r.getheader(auth_header_field, None) (ServerChallenge, NegotiateFlags) = ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value[5:]) auth = 'NTLM %s' % ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, UserName, DomainName, pw, NegotiateFlags) headers[self.auth_header] = auth headers["Connection"] = "Close" headers = dict((name.title(), val) for name, val in headers.items()) try: h.request(req.get_method(), req.get_selector(), req.data, headers) # none of the configured handlers are triggered, for example redirect-responses are not handled! response = h.getresponse() def notimplemented(): raise NotImplementedError response.readline = notimplemented infourl = addinfourl(response, response.msg, req.get_full_url()) infourl.code = response.status infourl.msg = response.reason return infourl except socket.error, err: raise urllib2.URLError(err)
def test_returns_custom_response_class_when_declared_on_method(self, urlopen): resp = addinfourl(StringIO(u"mock_content"), "mock headers", url="http://www.example.com/", code="200") urlopen.return_value = resp api_stub = CustomResponseClientStub() response = api_stub.do_simple() self.assertIsInstance(response, apyclient.JSONApiResponse) self.assertEqual(resp, response.original_response)
def test_returns_custom_response_class_when_declared_on_api_class(self, urlopen): resp = addinfourl(StringIO(u"mock_content"), "mock headers", url="http://www.example.com/", code="200") urlopen.return_value = resp api_stub = ApiCustomResponseStub() response = api_stub.do_something() self.assertIsInstance(response, CustomResponseTwo) self.assertEqual(resp, response._response)
def resolve(self, uriRef, baseUri=None): """ Takes a URI or a URI reference plus a base URI, produces a absolutized URI if a base URI was given, then attempts to obtain access to an entity representing the resource identified by the resulting URI, returning the entity as a stream (a file-like object). Raises a IriError if the URI scheme is unsupported or if a stream could not be obtained for any reason. """ if not isinstance(uriRef, urllib.request.Request): if baseUri is not None: uri = self.absolutize(uriRef, baseUri) scheme = get_scheme(uri) else: uri = uriRef scheme = get_scheme(uriRef) # since we didn't use absolutize(), we need to verify here if scheme not in self._supported_schemes: if scheme is None: raise ValueError('When the URI to resolve is a relative ' 'reference, it must be accompanied by a base URI.') else: raise IriError(IriError.UNSUPPORTED_SCHEME, scheme=scheme, resolver=self.__class__.__name__) req = urllib.request.Request(uri) else: req, uri = uriRef, uriRef.get_full_url() if self.authorizations and not self.authorize(uri): raise IriError(IriError.DENIED_BY_RULE, uri=uri) # Bypass urllib for opening local files. if scheme == 'file': path = uri_to_os_path(uri, attemptAbsolute=False) try: stream = open(path, 'rb') except IOError as e: raise IriError(IriError.RESOURCE_ERROR, loc='%s (%s)' % (uri, path), uri=uri, msg=str(e)) # Add the extra metadata that urllib normally provides (sans # the poorly guessed Content-Type header). stats = os.stat(path) size = stats.st_size mtime = _formatdate(stats.st_mtime) headers = email.Message(io.StringIO( 'Content-Length: %s\nLast-Modified: %s\n' % (size, mtime))) stream = urllib.addinfourl(stream, headers, uri) else: # urllib.request.urlopen, wrapped by us, will suffice for http, ftp, # data and gopher try: stream = urllib.request.urlopen(req) except IOError as e: raise IriError(IriError.RESOURCE_ERROR, uri=uri, loc=uri, msg=str(e)) return stream
def redirect_request(self, req, fp, code, msg, headers, newurl): if 'WWW-Authenticate' in headers: response = urllib.addinfourl(fp, msg, req.get_full_url()) response.code = 401 response.msg = headers return self.parent.error( 'http', req, response, 401, msg, headers) else: return urllib2.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
def http_error_302(self, req, fp, code, msg, headers): """ Stop request when finding a redirection """ infourl = urllib.addinfourl(fp, headers, req.get_full_url()) infourl.status = code infourl.code = code return infourl
def http_error_302(self, req, fp, code, msg, headers): infourl = urllib.addinfourl(fp, headers, req.get_full_url()) infourl.status = code infourl.code = code return infourl
class myHTTPSHandler(M2Crypto.m2urllib2.HTTPSHandler): handler_order = 499 saved_session = None def __init__(self, *args, **kwargs): self.appname = kwargs.pop('appname', 'generic') M2Crypto.m2urllib2.HTTPSHandler.__init__(self, *args, **kwargs) # copied from M2Crypto.m2urllib2.HTTPSHandler # it's sole purpose is to use our myHTTPSHandler/myHTTPSProxyHandler class # ideally the m2urllib2.HTTPSHandler.https_open() method would be split into # "do_open()" and "https_open()" so that we just need to override # the small "https_open()" method...) def https_open(self, req): host = req.get_host() if not host: raise M2Crypto.m2urllib2.URLError('no host given: ' + req.get_full_url()) # Our change: Check to see if we're using a proxy. # Then create an appropriate ssl-aware connection. full_url = req.get_full_url() target_host = urlparse.urlparse(full_url)[1] if (target_host != host): h = myProxyHTTPSConnection(host=host, appname=self.appname, ssl_context=self.ctx) # M2Crypto.ProxyHTTPSConnection.putrequest expects a fullurl selector = full_url else: h = myHTTPSConnection(host=host, appname=self.appname, ssl_context=self.ctx) selector = req.get_selector() # End our change h.set_debuglevel(self._debuglevel) if self.saved_session: h.set_session(self.saved_session) headers = dict(req.headers) headers.update(req.unredirected_hdrs) # We want to make an HTTP/1.1 request, but the addinfourl # class isn't prepared to deal with a persistent connection. # It will try to read all remaining data from the socket, # which will block while the server waits for the next request. # So make sure the connection gets closed after the (only) # request. headers["Connection"] = "close" try: h.request(req.get_method(), selector, req.data, headers) s = h.get_session() if s: self.saved_session = s r = h.getresponse() except socket.error, err: # XXX what error? err.filename = full_url raise M2Crypto.m2urllib2.URLError(err) # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. # Wrap the HTTPResponse object in socket's file object adapter # for Windows. That adapter calls recv(), so delegate recv() # to read(). This weird wrapping allows the returned object to # have readline() and readlines() methods. # XXX It might be better to extract the read buffering code # out of socket._fileobject() and into a base class. r.recv = r.read fp = socket._fileobject(r) resp = urllib.addinfourl(fp, r.msg, req.get_full_url()) resp.code = r.status resp.msg = r.reason return resp
r.recv = r.read # no data, just have to read r.read() class fp_dummy(object): def read(self): return "" def readline(self): return "" def close(self): pass resp = addinfourl(fp_dummy(), r.msg, req.get_full_url()) resp.code = r.status resp.msg = r.reason # Close connection when server request it. if fetch.connection_cache is not None: if 'Connection' in r.msg and r.msg['Connection'] == 'close': fetch.connection_cache.remove_connection( h.host, h.port) return resp class HTTPMethodFallback(urllib2.BaseHandler): """ Fallback to GET if HEAD is not allowed (405 HTTP error) """
def http_error_302(self,req,fp,code,msg,headers): infourl=urllib.addinfourl(fp,headers,req.get_full_url()); infourl.status=code; infourl.code=code; return infourl http_error_300=http_error_302; http_error_301=http_error_302; http_error_303=http_error_302; http_error_307=http_error_302
def open_http(self, url, data=None): """Use HTTP protocol.""" import httplib user_passwd = None if type(url) is type(""): host, selector = splithost(url) if host: user_passwd, host = splituser(host) host = unquote(host) realhost = host else: host, selector = url urltype, rest = splittype(selector) url = rest user_passwd = None if string.lower(urltype) != 'http': realhost = None else: realhost, rest = splithost(rest) if realhost: user_passwd, realhost = splituser(realhost) if user_passwd: selector = "%s://%s%s" % (urltype, realhost, rest) #print "proxy via http:", host, selector if not host: raise IOError('http error', 'no host given') if user_passwd: import base64 auth = string.strip(base64.encodestring(user_passwd)) else: auth = None h = httplib.HTTP(host) if data is not None: h.putrequest('POST', selector) h.putheader('Content-type', 'application/x-www-form-urlencoded') h.putheader('Content-length', '%d' % len(data)) else: h.putrequest('GET', selector) for cookie in self.cookies.items(): h.putheader('Cookie', '%s=%s;' % cookie) if auth: h.putheader('Authorization', 'Basic %s' % auth) if realhost: h.putheader('Host', realhost) for args in self.addheaders: apply(h.putheader, args) h.endheaders() if data is not None: h.send(data + '\r\n') errcode, errmsg, headers = h.getreply() if headers and headers.has_key('set-cookie'): cookies = headers.getallmatchingheaders('set-cookie') for cookie in cookies: self.cookies.load(cookie) fp = h.getfile() if errcode == 200: return addinfourl(fp, headers, "http:" + url) else: if data is None: return self.http_error(url, fp, errcode, errmsg, headers) else: return self.http_error(url, fp, errcode, errmsg, headers, data)
class AbstractHTTPHandler(BaseHandler): def __init__(self, debuglevel=0): self._debuglevel = debuglevel def set_http_debuglevel(self, level): self._debuglevel = level def do_request_(self, request): host = request.get_host() if not host: raise URLError('no host given') if request.has_data(): # POST data = request.get_data() if not request.has_header('Content-type'): request.add_unredirected_header( 'Content-type', 'application/x-www-form-urlencoded') if not request.has_header('Content-length'): request.add_unredirected_header('Content-length', '%d' % len(data)) scheme, sel = splittype(request.get_selector()) sel_host, sel_path = splithost(sel) if not request.has_header('Host'): request.add_unredirected_header('Host', sel_host or host) for name, value in self.parent.addheaders: name = name.capitalize() if not request.has_header(name): request.add_unredirected_header(name, value) return request def do_open(self, http_class, req): """Return an addinfourl object for the request, using http_class. http_class must implement the HTTPConnection API from httplib. The addinfourl return value is a file-like object. It also has methods and attributes including: - info(): return a mimetools.Message object for the headers - geturl(): return the original request URL - code: HTTP status code """ host = req.get_host() if not host: raise URLError('no host given') h = http_class(host) # will parse host:port h.set_debuglevel(self._debuglevel) headers = dict(req.headers) headers.update(req.unredirected_hdrs) # We want to make an HTTP/1.1 request, but the addinfourl # class isn't prepared to deal with a persistent connection. # It will try to read all remaining data from the socket, # which will block while the server waits for the next request. # So make sure the connection gets closed after the (only) # request. headers["Connection"] = "close" headers = dict((name.title(), val) for name, val in headers.items()) try: h.request(req.get_method(), req.get_selector(), req.data, headers) r = h.getresponse() except socket.error, err: # XXX what error? raise URLError(err) # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. # Wrap the HTTPResponse object in socket's file object adapter # for Windows. That adapter calls recv(), so delegate recv() # to read(). This weird wrapping allows the returned object to # have readline() and readlines() methods. # XXX It might be better to extract the read buffering code # out of socket._fileobject() and into a base class. r.recv = r.read fp = socket._fileobject(r, close=True) resp = addinfourl(fp, r.msg, req.get_full_url()) resp.code = r.status resp.msg = r.reason return resp
def rtmp_open(self, req): url = req.get_selector() return urllib.addinfourl(StringIO(''), req.headers, req.get_full_url())
class FTPChunkHandler(FTPHandler): """The code was taken from urllib2.py. The only difference is that offsets are supported by this class using the REST-command. Offsets are needed for chunked loading. """ def ftp_open(self, req): import mimetypes host = req.get_host() if not host: raise URLError('ftp error: no host given') host, port = splitport(host) if port is None: port = ftplib.FTP_PORT else: port = int(port) # username/password handling user, host = splituser(host) if user: user, passwd = splitpasswd(user) else: passwd = None host = unquote(host) user = unquote(user or '') passwd = unquote(passwd or '') try: host = socket.gethostbyname(host) except socket.error, msg: raise URLError(msg) path, attrs = splitattr(req.get_selector()) dirs = path.split('/') dirs = map(unquote, dirs) dirs, file = dirs[:-1], dirs[-1] if dirs and not dirs[0]: dirs = dirs[1:] try: fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) type = file and 'I' or 'D' for attr in attrs: attr, value = splitvalue(attr) if attr.lower() == 'type' and \ value in ('a', 'A', 'i', 'I', 'd', 'D'): type = value.upper() # EDIT START # get REST (file offset) from headers rest = 0 offset = req.headers.get('Offset', None) if offset is not None and offset > 0: rest = offset # EDIT END fp, retrlen = fw.retrfile(file, type, rest) headers = "" mtype = mimetypes.guess_type(req.get_full_url())[0] if mtype: headers += "Content-type: %s\n" % mtype if retrlen is not None and retrlen >= 0: headers += "Content-length: %d\n" % retrlen sf = StringIO(headers) headers = mimetools.Message(sf) return addinfourl(fp, headers, req.get_full_url()) except ftplib.all_errors, msg: raise URLError, ('ftp error: %s' % msg), sys.exc_info()[2]
else: raise CrawlerFatalError('Maximum retry limit reached: %r' % url) # read the page at once (for some reason specifying read(n) causes errors.) buf = resp.read() if MAX_PAGE_LEN < len(buf): raise CrawlerPageError('Too long page (>%dbytes): %r' % (MAX_PAGE_LEN, url)) # interpret the encoding. if 'gzip' in resp.getheader('Content-Encoding', '').lower(): fp = GzipFile(fileobj=StringIO(buf)) else: fp = StringIO(buf) # get cookie received. if self.cookiejar: r = addinfourl(fp, resp.msg, url) r.code = resp.status self.cookiejar.extract_cookies(r, req) # check the result code. status = resp.status if status in (301, 302): url0 = urljoin(url, resp.getheader('Location', '')) url1 = self.accept_url(url0) if url1 and (url1 not in self.crawled or self.crawled[url1] != 2): print >> stderr, 'REDIRECTED: Status=%d: %r' % (status, url1) url = url1 continue else: raise CrawlerWarning('Status=%d: Ignore redirect: %r' %
def mock_response(url, response_text, headers=DEFAULT_HEADERS): all_headers = headers + [("Content-length", len(response_text))] headers_obj = _headers(all_headers) return addinfourl(StringIO(response_text), headers_obj, url)
class newHTTPHandler(urllib2.BaseHandler): def http_open(self, req): return self.do_open(httplib.HTTP, req) def do_open(self, http_class, req): data = req.get_data() v_files = [] v_vars = [] # mapping object (dict) if req.has_data() and type(data) != str: if hasattr(data, 'items'): data = data.items() else: try: if len(data) and not isinstance(data[0], tuple): raise TypeError except TypeError: ty, va, tb = sys.exc_info() raise TypeError, "not a valid non-string sequence or mapping object", tb for (k, v) in data: if hasattr(v, 'read'): v_files.append((k, v)) else: v_vars.append((k, v)) # no file ? convert to string if len(v_vars) > 0 and len(v_files) == 0: data = urllib.urlencode(v_vars) v_files = [] v_vars = [] host = req.get_host() if not host: raise urllib2.URLError('no host given') h = http_class(host) # will parse host:port if req.has_data(): h.putrequest('POST', req.get_selector()) if not 'Content-type' in req.headers: if len(v_files) > 0: boundary = mimetools.choose_boundary() l = send_data(v_vars, v_files, boundary) h.putheader('Content-Type', 'multipart/form-data; boundary=%s' % boundary) h.putheader('Content-length', str(l)) else: h.putheader('Content-type', 'application/x-www-form-urlencoded') if not 'Content-length' in req.headers: h.putheader('Content-length', '%d' % len(data)) else: h.putrequest('GET', req.get_selector()) scheme, sel = urllib.splittype(req.get_selector()) sel_host, sel_path = urllib.splithost(sel) h.putheader('Host', sel_host or host) for name, value in self.parent.addheaders: name = name.capitalize() if name not in req.headers: h.putheader(name, value) for k, v in req.headers.items(): h.putheader(k, v) # httplib will attempt to connect() here. be prepared # to convert a socket error to a URLError. try: h.endheaders() except socket.error, err: raise urllib2.URLError(err) if req.has_data(): if len(v_files) > 0: l = send_data(v_vars, v_files, boundary, h) elif len(v_vars) > 0: # if data is passed as dict ... data = urllib.urlencode(v_vars) h.send(data) else: # "normal" urllib2.urlopen() h.send(data) code, msg, hdrs = h.getreply() fp = h.getfile() if code == 200: resp = urllib.addinfourl(fp, hdrs, req.get_full_url()) resp.code = code resp.msg = msg return resp else: return self.parent.error('http', req, fp, code, msg, hdrs)
class http2handler(urllib2.HTTPHandler, urllib2.HTTPSHandler): def __init__(self, ui, pwmgr): global _configuredlogging urllib2.AbstractHTTPHandler.__init__(self) self.ui = ui self.pwmgr = pwmgr self._connections = {} loglevel = ui.config('ui', 'http2debuglevel', default=None) if loglevel and not _configuredlogging: _configuredlogging = True logger = logging.getLogger('mercurial.httpclient') logger.setLevel(getattr(logging, loglevel.upper())) handler = logging.StreamHandler() handler.setFormatter(logging.Formatter(LOGFMT)) logger.addHandler(handler) def close_all(self): """Close and remove all connection objects being kept for reuse.""" for openconns in self._connections.values(): for conn in openconns: conn.close() self._connections = {} # shamelessly borrowed from urllib2.AbstractHTTPHandler def do_open(self, http_class, req, use_ssl): """Return an addinfourl object for the request, using http_class. http_class must implement the HTTPConnection API from httplib. The addinfourl return value is a file-like object. It also has methods and attributes including: - info(): return a mimetools.Message object for the headers - geturl(): return the original request URL - code: HTTP status code """ # If using a proxy, the host returned by get_host() is # actually the proxy. On Python 2.6.1, the real destination # hostname is encoded in the URI in the urllib2 request # object. On Python 2.6.5, it's stored in the _tunnel_host # attribute which has no accessor. tunhost = getattr(req, '_tunnel_host', None) host = req.get_host() if tunhost: proxyhost = host host = tunhost elif req.has_proxy(): proxyhost = req.get_host() host = req.get_selector().split('://', 1)[1].split('/', 1)[0] else: proxyhost = None if proxyhost: if ':' in proxyhost: # Note: this means we'll explode if we try and use an # IPv6 http proxy. This isn't a regression, so we # won't worry about it for now. proxyhost, proxyport = proxyhost.rsplit(':', 1) else: proxyport = 3128 # squid default proxy = (proxyhost, proxyport) else: proxy = None if not host: raise urllib2.URLError('no host given') connkey = use_ssl, host, proxy allconns = self._connections.get(connkey, []) conns = [c for c in allconns if not c.busy()] if conns: h = conns[0] else: if allconns: self.ui.debug('all connections for %s busy, making a new ' 'one\n' % host) timeout = None if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: timeout = req.timeout h = http_class(host, timeout=timeout, proxy_hostport=proxy) self._connections.setdefault(connkey, []).append(h) headers = dict(req.headers) headers.update(req.unredirected_hdrs) headers = dict((name.title(), val) for name, val in headers.items()) try: path = req.get_selector() if '://' in path: path = path.split('://', 1)[1].split('/', 1)[1] if path[0] != '/': path = '/' + path h.request(req.get_method(), path, req.data, headers) r = h.getresponse() except socket.error, err: # XXX what error? raise urllib2.URLError(err) # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. r.recv = r.read resp = urllib.addinfourl(r, r.headers, req.get_full_url()) resp.code = r.status resp.msg = r.reason return resp
def get_response(self): """Returns a copy of the current response.""" return urllib.addinfourl(StringIO(self.data), self._response.info(), self._response.geturl())
def http_error_300(self, req, fp, code, msg, header_list): data = urllib.addinfourl(fp, header_list, req.get_full_url()) data.status = code data.code = code return data
def http_error_default(self, url, fp, errcode, errmsg, headers): return urllib.addinfourl(fp, [headers, errcode], "http:" + url)
def smb_open(self, req): global USE_NTLM, MACHINE_NAME host = req.get_host() if not host: raise urllib2.URLError('SMB error: no host given') host, port = splitport(host) if port is None: port = 139 else: port = int(port) # username/password handling user, host = splituser(host) if user: user, passwd = splitpasswd(user) else: passwd = None host = unquote(host) user = user or '' passwd = passwd or '' myname = MACHINE_NAME or self.generateClientMachineName() n = NetBIOS() names = n.queryIPForName(host) if names: server_name = names[0] else: raise urllib2.URLError( 'SMB error: Hostname does not reply back with its machine name' ) path, attrs = splitattr(req.get_selector()) if path.startswith('/'): path = path[1:] dirs = path.split('/') dirs = map(unquote, dirs) service, path = dirs[0], '/'.join(dirs[1:]) try: conn = SMBConnection(user, passwd, myname, server_name, use_ntlm_v2=USE_NTLM) conn.connect(host, port) if req.has_data(): data_fp = req.get_data() filelen = conn.storeFile(service, path, data_fp) headers = "Content-length: 0\n" fp = StringIO("") else: fp = self.createTempFile() file_attrs, retrlen = conn.retrieveFile(service, path, fp) fp.seek(0) headers = "" mtype = mimetypes.guess_type(req.get_full_url())[0] if mtype: headers += "Content-type: %s\n" % mtype if retrlen is not None and retrlen >= 0: headers += "Content-length: %d\n" % retrlen sf = StringIO(headers) headers = mimetools.Message(sf) return addinfourl(fp, headers, req.get_full_url()) except Exception, ex: raise urllib2.URLError, ('smb error: %s' % ex), sys.exc_info()[2]
def http_error_416(self, req, fp, code, msg, hdrs): # HTTP's Range Not Satisfiable error r = urllib.addinfourl(fp, hdrs, req.get_full_url()) r.code = code r.msg = msg return r
def _mocked(url, *args, **kwargs): response_data, headers = response_method(url) return addinfourl(WhateverIO(response_data), headers, url)
def retry_using_http_NTLM_auth(self, req, auth_header_field, realm, headers): user, pw = self.passwd.find_user_password(realm, req.get_full_url()) if pw is not None: user_parts = user.split('\\', 1) if len(user_parts) == 1: UserName = user_parts[0] DomainName = '' type1_flags = ntlm.NTLM_TYPE1_FLAGS & ~ntlm.NTLM_NegotiateOemDomainSupplied else: DomainName = user_parts[0].upper() UserName = user_parts[1] type1_flags = ntlm.NTLM_TYPE1_FLAGS # ntlm secures a socket, so we must use the same socket for the complete handshake headers = dict(req.headers) headers.update(req.unredirected_hdrs) auth = 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE( user, type1_flags) if req.headers.get(self.auth_header, None) == auth: return None headers[self.auth_header] = auth host = req.host if not host: raise urllib2.URLError('no host given') h = None if req.get_full_url().startswith('https://'): h = HTTPSConnection(host) # will parse host:port else: h = HTTPConnection(host) # will parse host:port h.set_debuglevel(self._debuglevel) # we must keep the connection because NTLM authenticates the connection, not single requests headers["Connection"] = "Keep-Alive" headers = dict( (name.title(), val) for name, val in headers.items()) # For some reason, six doesn't do this translation correctly # TODO rsanders low - find bug in six & fix it try: selector = req.selector except AttributeError: selector = req.get_selector() h.request(req.get_method(), selector, req.data, headers) r = h.getresponse() r.begin() r._safe_read(int(r.getheader('content-length'))) if r.getheader('set-cookie'): # this is important for some web applications that store authentication-related info in cookies (it took a long time to figure out) headers['Cookie'] = r.getheader('set-cookie') r.fp = None # remove the reference to the socket, so that it can not be closed by the response object (we want to keep the socket open) auth_header_value = r.getheader(auth_header_field, None) # some Exchange servers send two WWW-Authenticate headers, one with the NTLM challenge # and another with the 'Negotiate' keyword - make sure we operate on the right one m = re.match('(NTLM [A-Za-z0-9+\-/=]+)', auth_header_value) if m: auth_header_value, = m.groups() (ServerChallenge, NegotiateFlags) = ntlm.parse_NTLM_CHALLENGE_MESSAGE( auth_header_value[5:]) auth = 'NTLM %s' % ntlm.create_NTLM_AUTHENTICATE_MESSAGE( ServerChallenge, UserName, DomainName, pw, NegotiateFlags) headers[self.auth_header] = auth headers["Connection"] = "Close" headers = dict( (name.title(), val) for name, val in headers.items()) try: h.request(req.get_method(), selector, req.data, headers) # none of the configured handlers are triggered, for example redirect-responses are not handled! response = h.getresponse() def notimplemented(): raise NotImplementedError response.readline = notimplemented infourl = urllib.addinfourl(response, response.msg, req.get_full_url()) infourl.code = response.status infourl.msg = response.reason return infourl except socket.error as err: raise urllib2.URLError(err) else: return None
class FTPRangeHandler(urllib2.FTPHandler): def ftp_open(self, req): host = req.get_host() if not host: raise IOError, ('ftp error', 'no host given') host, port = splitport(host) if port is None: port = ftplib.FTP_PORT else: port = int(port) # username/password handling user, host = splituser(host) if user: user, passwd = splitpasswd(user) else: passwd = None host = unquote(host) user = unquote(user or '') passwd = unquote(passwd or '') try: host = socket.gethostbyname(host) except socket.error, msg: raise urllib2.URLError(msg) path, attrs = splitattr(req.get_selector()) dirs = path.split('/') dirs = map(unquote, dirs) dirs, file = dirs[:-1], dirs[-1] if dirs and not dirs[0]: dirs = dirs[1:] try: fw = self.connect_ftp(user, passwd, host, port, dirs) type = file and 'I' or 'D' for attr in attrs: attr, value = splitattr(attr) if attr.lower() == 'type' and \ value in ('a', 'A', 'i', 'I', 'd', 'D'): type = value.upper() # -- range support modifications start here rest = None range_tup = range_header_to_tuple(req.headers.get('Range',None)) assert range_tup != () if range_tup: (fb,lb) = range_tup if fb > 0: rest = fb # -- range support modifications end here fp, retrlen = fw.retrfile(file, type, rest) # -- range support modifications start here if range_tup: (fb,lb) = range_tup if lb == '': if retrlen is None or retrlen == 0: raise RangeError(9, 'Requested Range Not Satisfiable due to unobtainable file length.') lb = retrlen retrlen = lb - fb if retrlen < 0: # beginning of range is larger than file raise RangeError(9, 'Requested Range Not Satisfiable') else: retrlen = lb - fb fp = RangeableFileObject(fp, (0,retrlen)) # -- range support modifications end here headers = "" mtype = mimetypes.guess_type(req.get_full_url())[0] if mtype: headers += "Content-Type: %s\n" % mtype if retrlen is not None and retrlen >= 0: headers += "Content-Length: %d\n" % retrlen sf = StringIO(headers) headers = mimetools.Message(sf) return addinfourl(fp, headers, req.get_full_url()) except ftplib.all_errors, msg: raise IOError, ('ftp error', msg), sys.exc_info()[2]
def http_error_206(self, req, fp, code, msg, hdrs): # 206 Partial Content Response r = urllib.addinfourl(fp, hdrs, req.get_full_url()) r.code = code r.msg = msg return r
def do_open(self, http_class, req): # Large portions from Python 3.3 Lib/urllib/request.py and # Python 2.6 Lib/urllib2.py if sys.version_info >= (3,): host = req.host else: host = req.get_host() if not host: raise URLError('no host given') if self.connection and self.connection.host != host: self.close() # Re-use the connection if possible self.use_count += 1 if not self.connection: h = http_class(host, timeout=req.timeout) else: h = self.connection if self._debuglevel == 5: console_write( u''' Urllib %s Debug General Re-using connection to %s on port %s for request #%s ''', (h._debug_protocol, h.host, h.port, self.use_count) ) if sys.version_info >= (3,): headers = dict(req.unredirected_hdrs) headers.update(dict((k, v) for k, v in req.headers.items() if k not in headers)) headers = dict((name.title(), val) for name, val in headers.items()) else: h.set_debuglevel(self._debuglevel) headers = dict(req.headers) headers.update(req.unredirected_hdrs) headers = dict( (name.title(), val) for name, val in headers.items()) if req._tunnel_host and not self.connection: tunnel_headers = {} proxy_auth_hdr = "Proxy-Authorization" if proxy_auth_hdr in headers: tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] del headers[proxy_auth_hdr] if sys.version_info >= (3,): h.set_tunnel(req._tunnel_host, headers=tunnel_headers) else: h._set_tunnel(req._tunnel_host, headers=tunnel_headers) try: if sys.version_info >= (3,): h.request(req.get_method(), req.selector, req.data, headers) else: h.request(req.get_method(), req.get_selector(), req.data, headers) except socket.error as err: # timeout error h.close() raise URLError(err) else: r = h.getresponse() # Keep the connection around for re-use if r.is_keep_alive(): self.connection = h else: if self._debuglevel == 5: s = '' if self.use_count == 1 else 's' console_write( u''' Urllib %s Debug General Closing connection to %s on port %s after %s request%s ''', (h._debug_protocol, h.host, h.port, self.use_count, s) ) self.use_count = 0 self.connection = None if sys.version_info >= (3,): r.url = req.get_full_url() r.msg = r.reason return r r.recv = r.read fp = socket._fileobject(r, close=True) resp = addinfourl(fp, r.msg, req.get_full_url()) resp.code = r.status resp.msg = r.reason return resp
def http_error_206(self, request, fp, errcode, msg, headers): return urllib.addinfourl(fp, headers, request.get_full_url())
def resolve(self, uriRef, baseUri=None): """ Takes a URI or a URI reference plus a base URI, produces a absolutized URI if a base URI was given, then attempts to obtain access to an entity representing the resource identified by the resulting URI, returning the entity as a stream (a file-like object). Raises a IriError if the URI scheme is unsupported or if a stream could not be obtained for any reason. """ if not isinstance(uriRef, urllib.request.Request): if baseUri is not None: uri = self.absolutize(uriRef, baseUri) scheme = get_scheme(uri) else: uri = uriRef scheme = get_scheme(uriRef) # since we didn't use absolutize(), we need to verify here if scheme not in self._supported_schemes: if scheme is None: raise ValueError( 'When the URI to resolve is a relative ' 'reference, it must be accompanied by a base URI.') else: raise IriError(IriError.UNSUPPORTED_SCHEME, scheme=scheme, resolver=self.__class__.__name__) req = urllib.request.Request(uri) else: req, uri = uriRef, uriRef.get_full_url() if self.authorizations and not self.authorize(uri): raise IriError(IriError.DENIED_BY_RULE, uri=uri) # Bypass urllib for opening local files. if scheme == 'file': path = uri_to_os_path(uri, attemptAbsolute=False) try: stream = open(path, 'rb') except IOError as e: raise IriError(IriError.RESOURCE_ERROR, loc='%s (%s)' % (uri, path), uri=uri, msg=str(e)) # Add the extra metadata that urllib normally provides (sans # the poorly guessed Content-Type header). stats = os.stat(path) size = stats.st_size mtime = _formatdate(stats.st_mtime) headers = email.Message( io.StringIO('Content-Length: %s\nLast-Modified: %s\n' % (size, mtime))) stream = urllib.addinfourl(stream, headers, uri) else: # urllib.request.urlopen, wrapped by us, will suffice for http, ftp, # data and gopher try: stream = urllib.request.urlopen(req) except IOError as e: raise IriError(IriError.RESOURCE_ERROR, uri=uri, loc=uri, msg=str(e)) return stream
def add_response(self, url, status_code, headers, body=None): response = addinfourl(StringIO(body or ''), headers, url, status_code) responses = list(self.http_mock.side_effect) responses.append(response) self.http_mock.side_effect = responses
def retry_using_http_NTLM_auth(self, req, auth_header_field, realm, headers): user, pw = self.passwd.find_user_password(realm, req.get_full_url()) if pw is not None: # ntlm secures a socket, so we must use the same socket for the complete handshake headers = dict(req.headers) headers.update(req.unredirected_hdrs) auth = 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(user) if req.headers.get(self.auth_header, None) == auth: return None headers[self.auth_header] = auth host = req.get_host() if not host: raise urllib2.URLError('no host given') h = None if req.get_full_url().startswith('https://'): h = httplib.HTTPSConnection(host) # will parse host:port else: h = httplib.HTTPConnection(host) # will parse host:port h.set_debuglevel(self._debuglevel) # we must keep the connection because NTLM authenticates the connection, not single requests headers["Connection"] = "Keep-Alive" headers = dict( (name.title(), val) for name, val in headers.items()) h.request(req.get_method(), req.get_selector(), req.data, headers) r = h.getresponse() r.begin() r._safe_read(int(r.getheader('content-length'))) if r.getheader('set-cookie'): # this is important for some web applications that store authentication-related info in cookies (it took a long time to figure out) headers['Cookie'] = r.getheader('set-cookie') r.fp = None # remove the reference to the socket, so that it can not be closed by the response object (we want to keep the socket open) auth_header_value = r.getheader(auth_header_field, None) (ServerChallenge, NegotiateFlags) = ntlm.parse_NTLM_CHALLENGE_MESSAGE( auth_header_value[5:]) user_parts = user.split('\\', 1) DomainName = user_parts[0].upper() UserName = user_parts[1] auth = 'NTLM %s' % ntlm.create_NTLM_AUTHENTICATE_MESSAGE( ServerChallenge, UserName, DomainName, pw, NegotiateFlags) headers[self.auth_header] = auth headers["Connection"] = "Close" headers = dict( (name.title(), val) for name, val in headers.items()) try: h.request(req.get_method(), req.get_selector(), req.data, headers) # none of the configured handlers are triggered, for example redirect-responses are not handled! response = h.getresponse() def notimplemented(): raise NotImplementedError response.readline = notimplemented infourl = addinfourl(response, response.msg, req.get_full_url()) infourl.code = response.status infourl.msg = response.reason return infourl except socket.error, err: raise urllib2.URLError(err)
def http_error_206(self, url, fp, errcode, errmsg, headers, data=None): # The next line is taken from urllib's URLopener.open_http # method, at the end after the line "if errcode == 200:" return urllib.addinfourl(fp, headers, 'http:' + url)
def http_error_default(self, req, fp, code, msg, hdrs): infourl = addinfourl(fp, hdrs, req.get_full_url()) infourl.status = code infourl.code = code return infourl
"""An extensible library for opening URLs using a variety of protocols