class CookieHandler(object): def __init__(self, *args, **kw): # Somewhere to store cookies between consecutive requests self.cookies = SimpleCookie() super(CookieHandler, self).__init__(*args, **kw) def httpCookie(self, path): """Return self.cookies as an HTTP_COOKIE environment value.""" l = [m.OutputString().split(';')[0] for m in self.cookies.values() if path.startswith(m['path'])] return '; '.join(l) def loadCookies(self, envstring): self.cookies.load(envstring) def saveCookies(self, response): """Save cookies from the response.""" # Urgh - need to play with the response's privates to extract # cookies that have been set # TODO: extend the IHTTPRequest interface to allow access to all # cookies # TODO: handle cookie expirations for k, v in response._cookies.items(): k = k.encode('utf8') if bytes is str else k val = v['value'] val = val.encode('utf8') if bytes is str else val self.cookies[k] = val if 'path' in v: self.cookies[k]['path'] = v['path']
def visit(url, request): if request.headers.get('Accept') == 'application/json': return {'status_code': 200, 'content': {'agent': request.url}} cs = SimpleCookie() cookies = request.headers.get('Cookie') if cookies is not None: cs.load(str(cookies)) public_key = None for c in cs: if c == 'agent-login': json_cookie = json.loads( base64.b64decode(cs[c].value).decode('utf-8')) public_key = bakery.PublicKey.deserialize( json_cookie.get('public_key')) ms = httpbakery.extract_macaroons(request.headers) if len(ms) == 0: b = bakery.Bakery(key=discharge_key) m = b.oven.macaroon( version=bakery.LATEST_VERSION, expiry=datetime.utcnow() + timedelta(days=1), caveats=[ bakery.local_third_party_caveat( public_key, version=httpbakery.request_version( request.headers)) ], ops=[bakery.Op(entity='agent', action='login')]) content, headers = httpbakery.discharge_required_response( m, '/', 'test', 'message') resp = response(status_code=401, content=content, headers=headers) return request.hooks['response'][0](resp) return {'status_code': 200, 'content': {'agent-login': True}}
def extract_macaroons(headers): ''' Returns an array of any macaroons found in the given slice of cookies. @param headers: dict of headers @return: An array of array of mpy macaroons ''' mss = [] def add_macaroon(data): data = utils.b64decode(data) data_as_objs = json.loads(data.decode('utf-8')) ms = [utils.macaroon_from_dict(x) for x in data_as_objs] mss.append(ms) cookieHeader = headers.get('Cookie') if cookieHeader is not None: cs = SimpleCookie() # The cookie might be a unicode object, so convert it # to ASCII. This may cause an exception under Python 2. # TODO is that a problem? cs.load(str(cookieHeader)) for c in cs: if c.startswith('macaroon-'): add_macaroon(cs[c].value) # Python doesn't make it easy to have multiple values for a # key, so split the header instead, which is necessary # for HTTP1.1 compatibility anyway. macaroonHeader = headers.get('Macaroons') if macaroonHeader is not None: for h in macaroonHeader.split(','): add_macaroon(h) return mss
def _wrap(response): cookie = SimpleCookie() cookie.load(response.headers['Set-Cookie']) for key, value in cookie.items(): if key == ngw_env.pyramid.options['session.cookie.name']: return value.value return None
def _prepare_response(self, res_info, url): res = requests.Response() res.url = res_info.get('url', url) res.status_code = res_info.get('status_code', 200) res.reason_phrase = res_info.get( 'reason_phrase', REASON_PHRASES.get(res.status_code, 'UNKNOWN STATUS CODE')) if 'reason' in res_info: res.reason = res_info['reason'] if 'headers' in res_info: res.headers.update(res_info['headers']) if 'Set-Cookie' in res_info[ 'headers'] and 'cookies' not in res_info: cookies = SimpleCookie() for entry in res_info['headers']['Set-Cookie'].split(','): cookies.load(str(entry)) res.cookies.update(cookies) if 'cookies' in res_info: res.cookies.update(res_info['cookies']) res.raw = StreamContent(res_info.get('content', '')) return res
def _prepare_response(self, res_info, url): res = requests.Response() res.url = res_info.get('url', url) res.status_code = res_info.get('status_code', 200) res.reason_phrase = res_info.get('reason_phrase', REASON_PHRASES.get(res.status_code, 'UNKNOWN STATUS CODE')) if 'reason' in res_info: res.reason = res_info['reason'] if 'headers' in res_info: res.headers.update(res_info['headers']) if 'Set-Cookie' in res_info['headers'] and 'cookies' not in res_info: cookies = SimpleCookie() for entry in res_info['headers']['Set-Cookie'].split(','): cookies.load(str(entry)) res.cookies.update(cookies) if 'cookies' in res_info: res.cookies.update(res_info['cookies']) res.raw = StreamContent(res_info.get('content', '')) return res
def _assert_cookies_expired(self, http_headers): cookies_string = ";".join([c[1] for c in http_headers if c[0] == "Set-Cookie"]) all_cookies = SimpleCookie() all_cookies.load(cookies_string) now = datetime.datetime.utcnow() # for c in [self.provider.cookie_name, self.provider.session_cookie_name]: dt = datetime.datetime.strptime(all_cookies[c]["expires"], "%a, %d-%b-%Y %H:%M:%S GMT") assert dt < now # make sure the cookies have expired to be cleared
def _assert_cookies_expired(self, http_headers): cookies_string = ";".join( [c[1] for c in http_headers if c[0] == "Set-Cookie"]) all_cookies = SimpleCookie() all_cookies.load(cookies_string) now = datetime.datetime.now() for c in [self.provider.cookie_name, self.provider.session_cookie_name]: dt = datetime.datetime.strptime(all_cookies[c]["expires"], "%a, %d-%b-%Y %H:%M:%S GMT") assert dt < now # make sure the cookies have expired to be cleared
def cookies(self): cookies = SimpleCookie() cookie_header = self.environ.get("HTTP_COOKIE") if cookie_header: galaxy_cookies = "; ".join(x.strip() for x in cookie_header.split('; ') if x.startswith('galaxy')) if galaxy_cookies: try: cookies.load(galaxy_cookies) except CookieError: pass return cookies
def _handle_cookies(self, response): # type: (httplib.HTTPResponse) -> None """ Parse cookies from |HTTP| response and store for next request. :param httplib.HTTPResponse: The |HTTP| response. """ # FIXME: this cookie handling doesn't respect path, domain and expiry cookies = SimpleCookie() cookies.load(response.getheader('set-cookie', '')) self.cookies.update( dict((cookie.key, cookie.value) for cookie in cookies.values()))
class BaseHeaders(CaseInsensitiveMapping): """Represent the headers in an HTTP Request or Response message. `How to send non-English unicode string using HTTP header? <http://stackoverflow.com/q/5423223/>`_ and `What character encoding should I use for a HTTP header? <http://stackoverflow.com/q/4400678/>`_ have good notes on why we do everything as pure bytes here. """ def __init__(self, headers=()): """Takes headers as a dict, or list of items. """ CaseInsensitiveMapping.__init__(self, headers) # Cookie # ====== self.cookie = SimpleCookie() cookie = self.get(b'Cookie', b'') if PY3 and isinstance(cookie, bytes): cookie = cookie.decode('ascii', 'replace') try: self.cookie.load(cookie) except CookieError: pass # XXX really? def __setitem__(self, name, value): """Checks for CRLF in ``value``, then calls the superclass method: .. automethod:: pando.http.mapping.CaseInsensitiveMapping.__setitem__ """ _check_for_CRLF(value) super(BaseHeaders, self).__setitem__(name, value) def add(self, name, value): """Checks for CRLF in ``value``, then calls the superclass method: .. automethod:: pando.http.mapping.CaseInsensitiveMapping.add """ _check_for_CRLF(value) super(BaseHeaders, self).add(name, value) @property def raw(self): """Return the headers as a bytestring, formatted for an HTTP message. """ out = [] for header, values in sorted(self.items()): for value in values: out.append(header + b': ' + value) return b'\r\n'.join(out)
def test_session_expiration_set_to_configured_lifetime( self, time_mock, utc_time_sans_frac_mock): timestamp = time.mktime(datetime(2017, 1, 1).timetuple()) time_mock.return_value = timestamp utc_time_sans_frac_mock.return_value = int(timestamp) exp_time = 10 state = 'test_state' nonce = 'test_nonce' id_token = IdToken(iss=self.PROVIDER_BASEURL, aud=self.CLIENT_ID, sub='sub1', exp=int(timestamp) + exp_time, iat=int(timestamp), nonce=nonce) token_response = { 'access_token': 'test', 'token_type': 'Bearer', 'id_token': id_token.to_jwt() } token_endpoint = self.PROVIDER_BASEURL + '/token' responses.add(responses.POST, token_endpoint, json=token_response) session_lifetime = 1234 self.app.config['PERMANENT_SESSION_LIFETIME'] = session_lifetime self.init_app( provider_metadata_extras={'token_endpoint': token_endpoint}) with self.app.test_client() as client: with client.session_transaction() as session: UserSession(session, self.PROVIDER_NAME) session['destination'] = '/' session['state'] = state session['nonce'] = nonce resp = client.get('/redirect_uri?state={}&code=test'.format(state)) cookies = SimpleCookie() cookies.load(resp.headers['Set-Cookie']) session_cookie_expiration = cookies[ self.app.config['SESSION_COOKIE_NAME']]['expires'] parsed_expiration = datetime.strptime(session_cookie_expiration, '%a, %d-%b-%Y %H:%M:%S GMT') cookie_lifetime = (parsed_expiration - datetime.utcnow()).total_seconds() assert cookie_lifetime == pytest.approx(session_lifetime, abs=1)
def extract_macaroons(headers_or_request): ''' Returns an array of any macaroons found in the given slice of cookies. If the argument implements a get_header method, that will be used instead of the get method to retrieve headers. @param headers_or_request: dict of headers or a urllib.request.Request-like object. @return: A list of list of mpy macaroons ''' def get_header(key, default=None): try: return headers_or_request.get_header(key, default) except AttributeError: return headers_or_request.get(key, default) mss = [] def add_macaroon(data): try: data = utils.b64decode(data) data_as_objs = json.loads(data.decode('utf-8')) except ValueError: return ms = [utils.macaroon_from_dict(x) for x in data_as_objs] mss.append(ms) cookie_header = get_header('Cookie') if cookie_header is not None: cs = SimpleCookie() # The cookie might be a unicode object, so convert it # to ASCII. This may cause an exception under Python 2. # TODO is that a problem? cs.load(str(cookie_header)) for c in cs: if c.startswith('macaroon-'): add_macaroon(cs[c].value) # Python doesn't make it easy to have multiple values for a # key, so split the header instead, which is necessary # for HTTP1.1 compatibility anyway (see RFC 7230, section 3.2.2) macaroon_header = get_header('Macaroons') if macaroon_header is not None: for h in macaroon_header.split(','): add_macaroon(h) return mss
def extract_macaroons(headers): ''' Returns an array of any macaroons found in the given slice of cookies. @param headers: dict of headers @return: An array of array of mpy macaroons ''' cookie_string = "\n".join(headers.get_all('Cookie', failobj=[])) cs = SimpleCookie() cs.load(cookie_string) mss = [] for c in cs: if not c.startswith('macaroon-'): continue data = base64.b64decode(cs[c].value) data_as_objs = json.loads(data.decode('utf-8')) ms = [ Macaroon.deserialize(json.dumps(x), serializer=JsonSerializer()) for x in data_as_objs ] mss.append(ms) return mss
def _prepare_response(self, res_info, url): res = requests.Response() res.url = res_info.get("url", url) res.status_code = res_info.get("status_code", 200) res.reason_phrase = res_info.get("reason_phrase", REASON_PHRASES.get(res.status_code, "UNKNOWN STATUS CODE")) if "reason" in res_info: res.reason = res_info["reason"] if "headers" in res_info: res.headers.update(res_info["headers"]) if "Set-Cookie" in res_info["headers"]: cookies = SimpleCookie() for entry in res_info["headers"]["Set-Cookie"].split(","): cookies.load(str(entry)) res.cookies.update(cookies) res.raw = StreamContent(res_info.get("content", "")) return res
def __init__(self, autologin_url, crawler): self.crawler = crawler s = crawler.settings self.passed_setting = { name: s.get(name) for name in ['SPLASH_URL', 'USER_AGENT', 'HTTP_PROXY', 'HTTPS_PROXY'] if s.get(name) } self.autologin_url = autologin_url self.login_url = s.get('AUTOLOGIN_LOGIN_URL') self.username = s.get('AUTOLOGIN_USERNAME') self.password = s.get('AUTOLOGIN_PASSWORD') self.extra_js = s.get('AUTOLOGIN_EXTRA_JS') self.autologin_download_delay = s.get('AUTOLOGIN_DOWNLOAD_DELAY') self.logout_url = s.get('AUTOLOGIN_LOGOUT_URL') self.check_logout = s.getbool('AUTOLOGIN_CHECK_LOGOUT', True) # _force_skip and _n_pend and for testing only self._force_skip = s.getbool('_AUTOLOGIN_FORCE_SKIP') self._n_pend = s.getint('_AUTOLOGIN_N_PEND') self._login_df = None self.max_logout_count = s.getint('AUTOLOGIN_MAX_LOGOUT_COUNT', 4) auth_cookies = s.get('AUTOLOGIN_COOKIES') self.skipped = False self.stats = crawler.stats if auth_cookies: cookies = SimpleCookie() cookies.load(auth_cookies) self.auth_cookies = [{ 'name': m.key, 'value': m.value } for m in cookies.values()] self.logged_in = True else: self.auth_cookies = None self.logged_in = False
class BaseHeaders(CaseInsensitiveMapping): """Represent the headers in an HTTP Request or Response message. `How to send non-English unicode string using HTTP header? <http://stackoverflow.com/q/5423223/>`_ and `What character encoding should I use for a HTTP header? <http://stackoverflow.com/q/4400678/>`_ have good notes on why we do everything as pure bytes here. """ def __init__(self, d): """Takes headers as a dict, list, or bytestring. """ if isinstance(d, bytes): from pando.exceptions import MalformedHeader def genheaders(): for line in d.splitlines(): if b':' not in line: # no colon separator in header raise MalformedHeader(line) k, v = line.split(b':', 1) if k != k.strip(): # disallowed leading or trailing whitspace # (per http://tools.ietf.org/html/rfc7230#section-3.2.4) raise MalformedHeader(line) yield k, v.strip() headers = genheaders() else: headers = d CaseInsensitiveMapping.__init__(self, headers) # Cookie # ====== self.cookie = SimpleCookie() cookie = self.get(b'Cookie', b'') if PY3 and isinstance(cookie, bytes): cookie = cookie.decode('ascii', 'replace') try: self.cookie.load(cookie) except CookieError: pass # XXX really? def __setitem__(self, name, value): """Checks for CRLF in ``value``, then calls the superclass method: .. automethod:: pando.http.mapping.CaseInsensitiveMapping.__setitem__ """ _check_for_CRLF(value) super(BaseHeaders, self).__setitem__(name, value) def add(self, name, value): """Checks for CRLF in ``value``, then calls the superclass method: .. automethod:: pando.http.mapping.CaseInsensitiveMapping.add """ _check_for_CRLF(value) super(BaseHeaders, self).add(name, value) @property def raw(self): """Return the headers as a bytestring, formatted for an HTTP message. """ out = [] for header, values in sorted(self.items()): for value in values: out.append(header + b': ' + value) return b'\r\n'.join(out)
def call_wsgi_app(wsgi_app, request, path_info): """ Call the ``wsgi_app`` with ``request`` and return its response. :param wsgi_app: The WSGI application to be run. :type wsgi_app: callable :param request: The Django request. :type request: :class:`django_wsgi.handler.DjangoWSGIRequest` :param path_info: The ``PATH_INFO`` to be used by the WSGI application. :type path: :class:`basestring` :raises django_wsgi.exc.ApplicationCallError: If ``path_info`` is not the last portion of the ``PATH_INFO`` in ``request``. :return: The response from the WSGI application, turned into a Django response. :rtype: :class:`django.http.HttpResponse` """ webob_request = request.webob new_request = webob_request.copy() # Moving the portion of the path consumed by the current view, from the # PATH_INTO to the SCRIPT_NAME: if not request.path_info.endswith(path_info): raise ApplicationCallError("Path %s is not the last portion of the " "PATH_INFO in the original request (%s)" % (path_info, request.path_info)) consumed_path = request.path_info[:-len(path_info)] new_request.path_info = path_info new_request.script_name = webob_request.script_name + consumed_path # If the user has been authenticated in Django, log him in the WSGI app: if request.user.is_authenticated(): new_request.remote_user = request.user.username # Cleaning the routing_args, if any. The application should have its own # arguments, without relying on any arguments from a parent application: if "wsgiorg.routing_args" in request.environ: del new_request.environ['wsgiorg.routing_args'] # And the same for the WebOb ad-hoc attributes: if "webob.adhoc_attrs" in request.environ: del new_request.environ['webob.adhoc_attrs'] # Calling the WSGI application and getting its response: (status_line, headers, body) = new_request.call_application(wsgi_app) status_code_raw = status_line.split(" ", 1)[0] status_code = int(status_code_raw) # Turning its response into a Django response: cookies = SimpleCookie() django_response = HttpResponse(body, status=status_code) for (header, value) in headers: if header.upper() == "SET-COOKIE": if PY2 and isinstance(value, text_type): # It can't be Unicode: value = value.encode("us-ascii") cookies.load(value) else: django_response[header] = value # Setting the cookies from Django: for (cookie_name, cookie) in cookies.items(): cookie_attributes = { 'key': cookie_name, 'value': cookie.value, 'expires': cookie['expires'], 'path': cookie['path'], 'domain': cookie['domain'], } if cookie['max-age']: # Starting in Django 1.3 it performs arithmetic operations # with 'Max-Age' cookie_attributes['max_age'] = int(cookie['max-age']) django_response.set_cookie(**cookie_attributes) return django_response
class Request(object): """An HTTP request. This object represents the metadata of an HTTP request message; that is, it contains attributes which describe the environment in which the request URL, headers, and body were sent (if you want tools to interpret the headers and body, those are elsewhere, mostly in Tools). This 'metadata' consists of socket data, transport characteristics, and the Request-Line. This object also contains data regarding the configuration in effect for the given URL, and the execution plan for generating a response. """ prev = None """ The previous Request object (if any). This should be None unless we are processing an InternalRedirect.""" # Conversation/connection attributes local = httputil.Host('127.0.0.1', 80) 'An httputil.Host(ip, port, hostname) object for the server socket.' remote = httputil.Host('127.0.0.1', 1111) 'An httputil.Host(ip, port, hostname) object for the client socket.' scheme = 'http' """ The protocol used between client and server. In most cases, this will be either 'http' or 'https'.""" server_protocol = 'HTTP/1.1' """ The HTTP version for which the HTTP server is at least conditionally compliant.""" base = '' """The (scheme://host) portion of the requested URL. In some cases (e.g. when proxying via mod_rewrite), this may contain path segments which cherrypy.url uses when constructing url's, but which otherwise are ignored by CherryPy. Regardless, this value MUST NOT end in a slash.""" # Request-Line attributes request_line = '' """ The complete Request-Line received from the client. This is a single string consisting of the request method, URI, and protocol version (joined by spaces). Any final CRLF is removed.""" method = 'GET' """ Indicates the HTTP method to be performed on the resource identified by the Request-URI. Common methods include GET, HEAD, POST, PUT, and DELETE. CherryPy allows any extension method; however, various HTTP servers and gateways may restrict the set of allowable methods. CherryPy applications SHOULD restrict the set (on a per-URI basis).""" query_string = '' """ The query component of the Request-URI, a string of information to be interpreted by the resource. The query portion of a URI follows the path component, and is separated by a '?'. For example, the URI 'http://www.cherrypy.org/wiki?a=3&b=4' has the query component, 'a=3&b=4'.""" query_string_encoding = 'utf8' """ The encoding expected for query string arguments after % HEX HEX decoding). If a query string is provided that cannot be decoded with this encoding, 404 is raised (since technically it's a different URI). If you want arbitrary encodings to not error, set this to 'Latin-1'; you can then encode back to bytes and re-decode to whatever encoding you like later. """ protocol = (1, 1) """The HTTP protocol version corresponding to the set of features which should be allowed in the response. If BOTH the client's request message AND the server's level of HTTP compliance is HTTP/1.1, this attribute will be the tuple (1, 1). If either is 1.0, this attribute will be the tuple (1, 0). Lower HTTP protocol versions are not explicitly supported.""" params = {} """ A dict which combines query string (GET) and request entity (POST) variables. This is populated in two stages: GET params are added before the 'on_start_resource' hook, and POST params are added between the 'before_request_body' and 'before_handler' hooks.""" # Message attributes header_list = [] """ A list of the HTTP request headers as (name, value) tuples. In general, you should use request.headers (a dict) instead.""" headers = httputil.HeaderMap() """ A dict-like object containing the request headers. Keys are header names (in Title-Case format); however, you may get and set them in a case-insensitive manner. That is, headers['Content-Type'] and headers['content-type'] refer to the same value. Values are header values (decoded according to :rfc:`2047` if necessary). See also: httputil.HeaderMap, httputil.HeaderElement.""" cookie = SimpleCookie() """See help(Cookie).""" rfile = None """ If the request included an entity (body), it will be available as a stream in this attribute. However, the rfile will normally be read for you between the 'before_request_body' hook and the 'before_handler' hook, and the resulting string is placed into either request.params or the request.body attribute. You may disable the automatic consumption of the rfile by setting request.process_request_body to False, either in config for the desired path, or in an 'on_start_resource' or 'before_request_body' hook. WARNING: In almost every case, you should not attempt to read from the rfile stream after CherryPy's automatic mechanism has read it. If you turn off the automatic parsing of rfile, you should read exactly the number of bytes specified in request.headers['Content-Length']. Ignoring either of these warnings may result in a hung request thread or in corruption of the next (pipelined) request. """ process_request_body = True """ If True, the rfile (if any) is automatically read and parsed, and the result placed into request.params or request.body.""" methods_with_bodies = ('POST', 'PUT', 'PATCH') """ A sequence of HTTP methods for which CherryPy will automatically attempt to read a body from the rfile. If you are going to change this property, modify it on the configuration (recommended) or on the "hook point" `on_start_resource`. """ body = None """ If the request Content-Type is 'application/x-www-form-urlencoded' or multipart, this will be None. Otherwise, this will be an instance of :class:`RequestBody<cherrypy._cpreqbody.RequestBody>` (which you can .read()); this value is set between the 'before_request_body' and 'before_handler' hooks (assuming that process_request_body is True).""" # Dispatch attributes dispatch = cherrypy.dispatch.Dispatcher() """ The object which looks up the 'page handler' callable and collects config for the current request based on the path_info, other request attributes, and the application architecture. The core calls the dispatcher as early as possible, passing it a 'path_info' argument. The default dispatcher discovers the page handler by matching path_info to a hierarchical arrangement of objects, starting at request.app.root. See help(cherrypy.dispatch) for more information.""" script_name = '' """ The 'mount point' of the application which is handling this request. This attribute MUST NOT end in a slash. If the script_name refers to the root of the URI, it MUST be an empty string (not "/"). """ path_info = '/' """ The 'relative path' portion of the Request-URI. This is relative to the script_name ('mount point') of the application which is handling this request.""" login = None """ When authentication is used during the request processing this is set to 'False' if it failed and to the 'username' value if it succeeded. The default 'None' implies that no authentication happened.""" # Note that cherrypy.url uses "if request.app:" to determine whether # the call is during a real HTTP request or not. So leave this None. app = None """The cherrypy.Application object which is handling this request.""" handler = None """ The function, method, or other callable which CherryPy will call to produce the response. The discovery of the handler and the arguments it will receive are determined by the request.dispatch object. By default, the handler is discovered by walking a tree of objects starting at request.app.root, and is then passed all HTTP params (from the query string and POST body) as keyword arguments.""" toolmaps = {} """ A nested dict of all Toolboxes and Tools in effect for this request, of the form: {Toolbox.namespace: {Tool.name: config dict}}.""" config = None """ A flat dict of all configuration entries which apply to the current request. These entries are collected from global config, application config (based on request.path_info), and from handler config (exactly how is governed by the request.dispatch object in effect for this request; by default, handler config can be attached anywhere in the tree between request.app.root and the final handler, and inherits downward).""" is_index = None """ This will be True if the current request is mapped to an 'index' resource handler (also, a 'default' handler if path_info ends with a slash). The value may be used to automatically redirect the user-agent to a 'more canonical' URL which either adds or removes the trailing slash. See cherrypy.tools.trailing_slash.""" hooks = HookMap(hookpoints) """ A HookMap (dict-like object) of the form: {hookpoint: [hook, ...]}. Each key is a str naming the hook point, and each value is a list of hooks which will be called at that hook point during this request. The list of hooks is generally populated as early as possible (mostly from Tools specified in config), but may be extended at any time. See also: _cprequest.Hook, _cprequest.HookMap, and cherrypy.tools.""" error_response = cherrypy.HTTPError(500).set_response """ The no-arg callable which will handle unexpected, untrapped errors during request processing. This is not used for expected exceptions (like NotFound, HTTPError, or HTTPRedirect) which are raised in response to expected conditions (those should be customized either via request.error_page or by overriding HTTPError.set_response). By default, error_response uses HTTPError(500) to return a generic error response to the user-agent.""" error_page = {} """ A dict of {error code: response filename or callable} pairs. The error code must be an int representing a given HTTP error code, or the string 'default', which will be used if no matching entry is found for a given numeric code. If a filename is provided, the file should contain a Python string- formatting template, and can expect by default to receive format values with the mapping keys %(status)s, %(message)s, %(traceback)s, and %(version)s. The set of format mappings can be extended by overriding HTTPError.set_response. If a callable is provided, it will be called by default with keyword arguments 'status', 'message', 'traceback', and 'version', as for a string-formatting template. The callable must return a string or iterable of strings which will be set to response.body. It may also override headers or perform any other processing. If no entry is given for an error code, and no 'default' entry exists, a default template will be used. """ show_tracebacks = True """ If True, unexpected errors encountered during request processing will include a traceback in the response body.""" show_mismatched_params = True """ If True, mismatched parameters encountered during PageHandler invocation processing will be included in the response body.""" throws = (KeyboardInterrupt, SystemExit, cherrypy.InternalRedirect) """The sequence of exceptions which Request.run does not trap.""" throw_errors = False """ If True, Request.run will not trap any errors (except HTTPRedirect and HTTPError, which are more properly called 'exceptions', not errors).""" closed = False """True once the close method has been called, False otherwise.""" stage = None """ A string containing the stage reached in the request-handling process. This is useful when debugging a live server with hung requests.""" unique_id = None """A lazy object generating and memorizing UUID4 on ``str()`` render.""" namespaces = reprconf.NamespaceSet( **{'hooks': hooks_namespace, 'request': request_namespace, 'response': response_namespace, 'error_page': error_page_namespace, 'tools': cherrypy.tools, }) def __init__(self, local_host, remote_host, scheme='http', server_protocol='HTTP/1.1'): """Populate a new Request object. local_host should be an httputil.Host object with the server info. remote_host should be an httputil.Host object with the client info. scheme should be a string, either "http" or "https". """ self.local = local_host self.remote = remote_host self.scheme = scheme self.server_protocol = server_protocol self.closed = False # Put a *copy* of the class error_page into self. self.error_page = self.error_page.copy() # Put a *copy* of the class namespaces into self. self.namespaces = self.namespaces.copy() self.stage = None self.unique_id = LazyUUID4() def close(self): """Run cleanup code. (Core)""" if not self.closed: self.closed = True self.stage = 'on_end_request' self.hooks.run('on_end_request') self.stage = 'close' def run(self, method, path, query_string, req_protocol, headers, rfile): r"""Process the Request. (Core) method, path, query_string, and req_protocol should be pulled directly from the Request-Line (e.g. "GET /path?key=val HTTP/1.0"). path This should be %XX-unquoted, but query_string should not be. When using Python 2, they both MUST be byte strings, not unicode strings. When using Python 3, they both MUST be unicode strings, not byte strings, and preferably not bytes \x00-\xFF disguised as unicode. headers A list of (name, value) tuples. rfile A file-like object containing the HTTP request entity. When run() is done, the returned object should have 3 attributes: * status, e.g. "200 OK" * header_list, a list of (name, value) tuples * body, an iterable yielding strings Consumer code (HTTP servers) should then access these response attributes to build the outbound stream. """ response = cherrypy.serving.response self.stage = 'run' try: self.error_response = cherrypy.HTTPError(500).set_response self.method = method path = path or '/' self.query_string = query_string or '' self.params = {} # Compare request and server HTTP protocol versions, in case our # server does not support the requested protocol. Limit our output # to min(req, server). We want the following output: # request server actual written supported response # protocol protocol response protocol feature set # a 1.0 1.0 1.0 1.0 # b 1.0 1.1 1.1 1.0 # c 1.1 1.0 1.0 1.0 # d 1.1 1.1 1.1 1.1 # Notice that, in (b), the response will be "HTTP/1.1" even though # the client only understands 1.0. RFC 2616 10.5.6 says we should # only return 505 if the _major_ version is different. rp = int(req_protocol[5]), int(req_protocol[7]) sp = int(self.server_protocol[5]), int(self.server_protocol[7]) self.protocol = min(rp, sp) response.headers.protocol = self.protocol # Rebuild first line of the request (e.g. "GET /path HTTP/1.0"). url = path if query_string: url += '?' + query_string self.request_line = '%s %s %s' % (method, url, req_protocol) self.header_list = list(headers) self.headers = httputil.HeaderMap() self.rfile = rfile self.body = None self.cookie = SimpleCookie() self.handler = None # path_info should be the path from the # app root (script_name) to the handler. self.script_name = self.app.script_name self.path_info = pi = path[len(self.script_name):] self.stage = 'respond' self.respond(pi) except self.throws: raise except Exception: if self.throw_errors: raise else: # Failure in setup, error handler or finalize. Bypass them. # Can't use handle_error because we may not have hooks yet. cherrypy.log(traceback=True, severity=40) if self.show_tracebacks: body = format_exc() else: body = '' r = bare_error(body) response.output_status, response.header_list, response.body = r if self.method == 'HEAD': # HEAD requests MUST NOT return a message-body in the response. response.body = [] try: cherrypy.log.access() except Exception: cherrypy.log.error(traceback=True) return response def respond(self, path_info): """Generate a response for the resource at self.path_info. (Core)""" try: try: try: self._do_respond(path_info) except (cherrypy.HTTPRedirect, cherrypy.HTTPError): inst = sys.exc_info()[1] inst.set_response() self.stage = 'before_finalize (HTTPError)' self.hooks.run('before_finalize') cherrypy.serving.response.finalize() finally: self.stage = 'on_end_resource' self.hooks.run('on_end_resource') except self.throws: raise except Exception: if self.throw_errors: raise self.handle_error() def _do_respond(self, path_info): response = cherrypy.serving.response if self.app is None: raise cherrypy.NotFound() self.hooks = self.__class__.hooks.copy() self.toolmaps = {} # Get the 'Host' header, so we can HTTPRedirect properly. self.stage = 'process_headers' self.process_headers() self.stage = 'get_resource' self.get_resource(path_info) self.body = _cpreqbody.RequestBody( self.rfile, self.headers, request_params=self.params) self.namespaces(self.config) self.stage = 'on_start_resource' self.hooks.run('on_start_resource') # Parse the querystring self.stage = 'process_query_string' self.process_query_string() # Process the body if self.process_request_body: if self.method not in self.methods_with_bodies: self.process_request_body = False self.stage = 'before_request_body' self.hooks.run('before_request_body') if self.process_request_body: self.body.process() # Run the handler self.stage = 'before_handler' self.hooks.run('before_handler') if self.handler: self.stage = 'handler' response.body = self.handler() # Finalize self.stage = 'before_finalize' self.hooks.run('before_finalize') response.finalize() def process_query_string(self): """Parse the query string into Python structures. (Core)""" try: p = httputil.parse_query_string( self.query_string, encoding=self.query_string_encoding) except UnicodeDecodeError: raise cherrypy.HTTPError( 404, 'The given query string could not be processed. Query ' 'strings for this resource must be encoded with %r.' % self.query_string_encoding) # Python 2 only: keyword arguments must be byte strings (type 'str'). if six.PY2: for key, value in p.items(): if isinstance(key, six.text_type): del p[key] p[key.encode(self.query_string_encoding)] = value self.params.update(p) def process_headers(self): """Parse HTTP header data into Python structures. (Core)""" # Process the headers into self.headers headers = self.headers for name, value in self.header_list: # Call title() now (and use dict.__method__(headers)) # so title doesn't have to be called twice. name = name.title() value = value.strip() headers[name] = httputil.decode_TEXT_maybe(value) # Some clients, notably Konquoror, supply multiple # cookies on different lines with the same key. To # handle this case, store all cookies in self.cookie. if name == 'Cookie': try: self.cookie.load(value) except CookieError as exc: raise cherrypy.HTTPError(400, str(exc)) if not dict.__contains__(headers, 'Host'): # All Internet-based HTTP/1.1 servers MUST respond with a 400 # (Bad Request) status code to any HTTP/1.1 request message # which lacks a Host header field. if self.protocol >= (1, 1): msg = "HTTP/1.1 requires a 'Host' request header." raise cherrypy.HTTPError(400, msg) host = dict.get(headers, 'Host') if not host: host = self.local.name or self.local.ip self.base = '%s://%s' % (self.scheme, host) def get_resource(self, path): """Call a dispatcher (which sets self.handler and .config). (Core)""" # First, see if there is a custom dispatch at this URI. Custom # dispatchers can only be specified in app.config, not in _cp_config # (since custom dispatchers may not even have an app.root). dispatch = self.app.find_config( path, 'request.dispatch', self.dispatch) # dispatch() should set self.handler and self.config dispatch(path) def handle_error(self): """Handle the last unanticipated exception. (Core)""" try: self.hooks.run('before_error_response') if self.error_response: self.error_response() self.hooks.run('after_error_response') cherrypy.serving.response.finalize() except cherrypy.HTTPRedirect: inst = sys.exc_info()[1] inst.set_response() cherrypy.serving.response.finalize()
class Request(object): """An HTTP request. This object represents the metadata of an HTTP request message; that is, it contains attributes which describe the environment in which the request URL, headers, and body were sent (if you want tools to interpret the headers and body, those are elsewhere, mostly in Tools). This 'metadata' consists of socket data, transport characteristics, and the Request-Line. This object also contains data regarding the configuration in effect for the given URL, and the execution plan for generating a response. """ prev = None """ The previous Request object (if any). This should be None unless we are processing an InternalRedirect.""" # Conversation/connection attributes local = httputil.Host('127.0.0.1', 80) 'An httputil.Host(ip, port, hostname) object for the server socket.' remote = httputil.Host('127.0.0.1', 1111) 'An httputil.Host(ip, port, hostname) object for the client socket.' scheme = 'http' """ The protocol used between client and server. In most cases, this will be either 'http' or 'https'.""" server_protocol = 'HTTP/1.1' """ The HTTP version for which the HTTP server is at least conditionally compliant.""" base = '' """The (scheme://host) portion of the requested URL. In some cases (e.g. when proxying via mod_rewrite), this may contain path segments which cherrypy.url uses when constructing url's, but which otherwise are ignored by CherryPy. Regardless, this value MUST NOT end in a slash.""" # Request-Line attributes request_line = '' """ The complete Request-Line received from the client. This is a single string consisting of the request method, URI, and protocol version (joined by spaces). Any final CRLF is removed.""" method = 'GET' """ Indicates the HTTP method to be performed on the resource identified by the Request-URI. Common methods include GET, HEAD, POST, PUT, and DELETE. CherryPy allows any extension method; however, various HTTP servers and gateways may restrict the set of allowable methods. CherryPy applications SHOULD restrict the set (on a per-URI basis).""" query_string = '' """ The query component of the Request-URI, a string of information to be interpreted by the resource. The query portion of a URI follows the path component, and is separated by a '?'. For example, the URI 'http://www.cherrypy.org/wiki?a=3&b=4' has the query component, 'a=3&b=4'.""" query_string_encoding = 'utf8' """ The encoding expected for query string arguments after % HEX HEX decoding). If a query string is provided that cannot be decoded with this encoding, 404 is raised (since technically it's a different URI). If you want arbitrary encodings to not error, set this to 'Latin-1'; you can then encode back to bytes and re-decode to whatever encoding you like later. """ protocol = (1, 1) """The HTTP protocol version corresponding to the set of features which should be allowed in the response. If BOTH the client's request message AND the server's level of HTTP compliance is HTTP/1.1, this attribute will be the tuple (1, 1). If either is 1.0, this attribute will be the tuple (1, 0). Lower HTTP protocol versions are not explicitly supported.""" params = {} """ A dict which combines query string (GET) and request entity (POST) variables. This is populated in two stages: GET params are added before the 'on_start_resource' hook, and POST params are added between the 'before_request_body' and 'before_handler' hooks.""" # Message attributes header_list = [] """ A list of the HTTP request headers as (name, value) tuples. In general, you should use request.headers (a dict) instead.""" headers = httputil.HeaderMap() """ A dict-like object containing the request headers. Keys are header names (in Title-Case format); however, you may get and set them in a case-insensitive manner. That is, headers['Content-Type'] and headers['content-type'] refer to the same value. Values are header values (decoded according to :rfc:`2047` if necessary). See also: httputil.HeaderMap, httputil.HeaderElement.""" cookie = SimpleCookie() """See help(Cookie).""" rfile = None """ If the request included an entity (body), it will be available as a stream in this attribute. However, the rfile will normally be read for you between the 'before_request_body' hook and the 'before_handler' hook, and the resulting string is placed into either request.params or the request.body attribute. You may disable the automatic consumption of the rfile by setting request.process_request_body to False, either in config for the desired path, or in an 'on_start_resource' or 'before_request_body' hook. WARNING: In almost every case, you should not attempt to read from the rfile stream after CherryPy's automatic mechanism has read it. If you turn off the automatic parsing of rfile, you should read exactly the number of bytes specified in request.headers['Content-Length']. Ignoring either of these warnings may result in a hung request thread or in corruption of the next (pipelined) request. """ process_request_body = True """ If True, the rfile (if any) is automatically read and parsed, and the result placed into request.params or request.body.""" methods_with_bodies = ('POST', 'PUT', 'PATCH') """ A sequence of HTTP methods for which CherryPy will automatically attempt to read a body from the rfile. If you are going to change this property, modify it on the configuration (recommended) or on the "hook point" `on_start_resource`. """ body = None """ If the request Content-Type is 'application/x-www-form-urlencoded' or multipart, this will be None. Otherwise, this will be an instance of :class:`RequestBody<cherrypy._cpreqbody.RequestBody>` (which you can .read()); this value is set between the 'before_request_body' and 'before_handler' hooks (assuming that process_request_body is True).""" # Dispatch attributes dispatch = cherrypy.dispatch.Dispatcher() """ The object which looks up the 'page handler' callable and collects config for the current request based on the path_info, other request attributes, and the application architecture. The core calls the dispatcher as early as possible, passing it a 'path_info' argument. The default dispatcher discovers the page handler by matching path_info to a hierarchical arrangement of objects, starting at request.app.root. See help(cherrypy.dispatch) for more information.""" script_name = '' """ The 'mount point' of the application which is handling this request. This attribute MUST NOT end in a slash. If the script_name refers to the root of the URI, it MUST be an empty string (not "/"). """ path_info = '/' """ The 'relative path' portion of the Request-URI. This is relative to the script_name ('mount point') of the application which is handling this request.""" login = None """ When authentication is used during the request processing this is set to 'False' if it failed and to the 'username' value if it succeeded. The default 'None' implies that no authentication happened.""" # Note that cherrypy.url uses "if request.app:" to determine whether # the call is during a real HTTP request or not. So leave this None. app = None """The cherrypy.Application object which is handling this request.""" handler = None """ The function, method, or other callable which CherryPy will call to produce the response. The discovery of the handler and the arguments it will receive are determined by the request.dispatch object. By default, the handler is discovered by walking a tree of objects starting at request.app.root, and is then passed all HTTP params (from the query string and POST body) as keyword arguments.""" toolmaps = {} """ A nested dict of all Toolboxes and Tools in effect for this request, of the form: {Toolbox.namespace: {Tool.name: config dict}}.""" config = None """ A flat dict of all configuration entries which apply to the current request. These entries are collected from global config, application config (based on request.path_info), and from handler config (exactly how is governed by the request.dispatch object in effect for this request; by default, handler config can be attached anywhere in the tree between request.app.root and the final handler, and inherits downward).""" is_index = None """ This will be True if the current request is mapped to an 'index' resource handler (also, a 'default' handler if path_info ends with a slash). The value may be used to automatically redirect the user-agent to a 'more canonical' URL which either adds or removes the trailing slash. See cherrypy.tools.trailing_slash.""" hooks = HookMap(hookpoints) """ A HookMap (dict-like object) of the form: {hookpoint: [hook, ...]}. Each key is a str naming the hook point, and each value is a list of hooks which will be called at that hook point during this request. The list of hooks is generally populated as early as possible (mostly from Tools specified in config), but may be extended at any time. See also: _cprequest.Hook, _cprequest.HookMap, and cherrypy.tools.""" error_response = cherrypy.HTTPError(500).set_response """ The no-arg callable which will handle unexpected, untrapped errors during request processing. This is not used for expected exceptions (like NotFound, HTTPError, or HTTPRedirect) which are raised in response to expected conditions (those should be customized either via request.error_page or by overriding HTTPError.set_response). By default, error_response uses HTTPError(500) to return a generic error response to the user-agent.""" error_page = {} """ A dict of {error code: response filename or callable} pairs. The error code must be an int representing a given HTTP error code, or the string 'default', which will be used if no matching entry is found for a given numeric code. If a filename is provided, the file should contain a Python string- formatting template, and can expect by default to receive format values with the mapping keys %(status)s, %(message)s, %(traceback)s, and %(version)s. The set of format mappings can be extended by overriding HTTPError.set_response. If a callable is provided, it will be called by default with keyword arguments 'status', 'message', 'traceback', and 'version', as for a string-formatting template. The callable must return a string or iterable of strings which will be set to response.body. It may also override headers or perform any other processing. If no entry is given for an error code, and no 'default' entry exists, a default template will be used. """ show_tracebacks = True """ If True, unexpected errors encountered during request processing will include a traceback in the response body.""" show_mismatched_params = True """ If True, mismatched parameters encountered during PageHandler invocation processing will be included in the response body.""" throws = (KeyboardInterrupt, SystemExit, cherrypy.InternalRedirect) """The sequence of exceptions which Request.run does not trap.""" throw_errors = False """ If True, Request.run will not trap any errors (except HTTPRedirect and HTTPError, which are more properly called 'exceptions', not errors).""" closed = False """True once the close method has been called, False otherwise.""" stage = None """ A string containing the stage reached in the request-handling process. This is useful when debugging a live server with hung requests.""" unique_id = None """A lazy object generating and memorizing UUID4 on ``str()`` render.""" namespaces = reprconf.NamespaceSet( **{'hooks': hooks_namespace, 'request': request_namespace, 'response': response_namespace, 'error_page': error_page_namespace, 'tools': cherrypy.tools, }) def __init__(self, local_host, remote_host, scheme='http', server_protocol='HTTP/1.1'): """Populate a new Request object. local_host should be an httputil.Host object with the server info. remote_host should be an httputil.Host object with the client info. scheme should be a string, either "http" or "https". """ self.local = local_host self.remote = remote_host self.scheme = scheme self.server_protocol = server_protocol self.closed = False # Put a *copy* of the class error_page into self. self.error_page = self.error_page.copy() # Put a *copy* of the class namespaces into self. self.namespaces = self.namespaces.copy() self.stage = None self.unique_id = LazyUUID4() def close(self): """Run cleanup code. (Core)""" if not self.closed: self.closed = True self.stage = 'on_end_request' self.hooks.run('on_end_request') self.stage = 'close' def run(self, method, path, query_string, req_protocol, headers, rfile): r"""Process the Request. (Core) method, path, query_string, and req_protocol should be pulled directly from the Request-Line (e.g. "GET /path?key=val HTTP/1.0"). path This should be %XX-unquoted, but query_string should not be. When using Python 2, they both MUST be byte strings, not unicode strings. When using Python 3, they both MUST be unicode strings, not byte strings, and preferably not bytes \x00-\xFF disguised as unicode. headers A list of (name, value) tuples. rfile A file-like object containing the HTTP request entity. When run() is done, the returned object should have 3 attributes: * status, e.g. "200 OK" * header_list, a list of (name, value) tuples * body, an iterable yielding strings Consumer code (HTTP servers) should then access these response attributes to build the outbound stream. """ response = cherrypy.serving.response self.stage = 'run' try: self.error_response = cherrypy.HTTPError(500).set_response self.method = method path = path or '/' self.query_string = query_string or '' self.params = {} # Compare request and server HTTP protocol versions, in case our # server does not support the requested protocol. Limit our output # to min(req, server). We want the following output: # request server actual written supported response # protocol protocol response protocol feature set # a 1.0 1.0 1.0 1.0 # b 1.0 1.1 1.1 1.0 # c 1.1 1.0 1.0 1.0 # d 1.1 1.1 1.1 1.1 # Notice that, in (b), the response will be "HTTP/1.1" even though # the client only understands 1.0. RFC 2616 10.5.6 says we should # only return 505 if the _major_ version is different. rp = int(req_protocol[5]), int(req_protocol[7]) sp = int(self.server_protocol[5]), int(self.server_protocol[7]) self.protocol = min(rp, sp) response.headers.protocol = self.protocol # Rebuild first line of the request (e.g. "GET /path HTTP/1.0"). url = path if query_string: url += '?' + query_string self.request_line = '%s %s %s' % (method, url, req_protocol) self.header_list = list(headers) self.headers = httputil.HeaderMap() self.rfile = rfile self.body = None self.cookie = SimpleCookie() self.handler = None # path_info should be the path from the # app root (script_name) to the handler. self.script_name = self.app.script_name self.path_info = pi = path[len(self.script_name):] self.stage = 'respond' self.respond(pi) except self.throws: raise except Exception: if self.throw_errors: raise else: # Failure in setup, error handler or finalize. Bypass them. # Can't use handle_error because we may not have hooks yet. cherrypy.log(traceback=True, severity=40) if self.show_tracebacks: body = format_exc() else: body = '' r = bare_error(body) response.output_status, response.header_list, response.body = r if self.method == 'HEAD': # HEAD requests MUST NOT return a message-body in the response. response.body = [] try: cherrypy.log.access() except Exception: cherrypy.log.error(traceback=True) return response # Uncomment for stage debugging # stage = property(lambda self: self._stage, lambda self, v: print(v)) def respond(self, path_info): """Generate a response for the resource at self.path_info. (Core)""" try: try: try: self._do_respond(path_info) except (cherrypy.HTTPRedirect, cherrypy.HTTPError): inst = sys.exc_info()[1] inst.set_response() self.stage = 'before_finalize (HTTPError)' self.hooks.run('before_finalize') cherrypy.serving.response.finalize() finally: self.stage = 'on_end_resource' self.hooks.run('on_end_resource') except self.throws: raise except Exception: if self.throw_errors: raise self.handle_error() def _do_respond(self, path_info): response = cherrypy.serving.response if self.app is None: raise cherrypy.NotFound() self.hooks = self.__class__.hooks.copy() self.toolmaps = {} # Get the 'Host' header, so we can HTTPRedirect properly. self.stage = 'process_headers' self.process_headers() self.stage = 'get_resource' self.get_resource(path_info) self.body = _cpreqbody.RequestBody( self.rfile, self.headers, request_params=self.params) self.namespaces(self.config) self.stage = 'on_start_resource' self.hooks.run('on_start_resource') # Parse the querystring self.stage = 'process_query_string' self.process_query_string() # Process the body if self.process_request_body: if self.method not in self.methods_with_bodies: self.process_request_body = False self.stage = 'before_request_body' self.hooks.run('before_request_body') if self.process_request_body: self.body.process() # Run the handler self.stage = 'before_handler' self.hooks.run('before_handler') if self.handler: self.stage = 'handler' response.body = self.handler() # Finalize self.stage = 'before_finalize' self.hooks.run('before_finalize') response.finalize() def process_query_string(self): """Parse the query string into Python structures. (Core)""" try: p = httputil.parse_query_string( self.query_string, encoding=self.query_string_encoding) except UnicodeDecodeError: raise cherrypy.HTTPError( 404, 'The given query string could not be processed. Query ' 'strings for this resource must be encoded with %r.' % self.query_string_encoding) # Python 2 only: keyword arguments must be byte strings (type 'str'). if six.PY2: for key, value in p.items(): if isinstance(key, six.text_type): del p[key] p[key.encode(self.query_string_encoding)] = value self.params.update(p) def process_headers(self): """Parse HTTP header data into Python structures. (Core)""" # Process the headers into self.headers headers = self.headers for name, value in self.header_list: # Call title() now (and use dict.__method__(headers)) # so title doesn't have to be called twice. name = name.title() value = value.strip() headers[name] = httputil.decode_TEXT_maybe(value) # Some clients, notably Konquoror, supply multiple # cookies on different lines with the same key. To # handle this case, store all cookies in self.cookie. if name == 'Cookie': try: self.cookie.load(value) except CookieError as exc: raise cherrypy.HTTPError(400, str(exc)) if not dict.__contains__(headers, 'Host'): # All Internet-based HTTP/1.1 servers MUST respond with a 400 # (Bad Request) status code to any HTTP/1.1 request message # which lacks a Host header field. if self.protocol >= (1, 1): msg = "HTTP/1.1 requires a 'Host' request header." raise cherrypy.HTTPError(400, msg) host = dict.get(headers, 'Host') if not host: host = self.local.name or self.local.ip self.base = '%s://%s' % (self.scheme, host) def get_resource(self, path): """Call a dispatcher (which sets self.handler and .config). (Core)""" # First, see if there is a custom dispatch at this URI. Custom # dispatchers can only be specified in app.config, not in _cp_config # (since custom dispatchers may not even have an app.root). dispatch = self.app.find_config( path, 'request.dispatch', self.dispatch) # dispatch() should set self.handler and self.config dispatch(path) def handle_error(self): """Handle the last unanticipated exception. (Core)""" try: self.hooks.run('before_error_response') if self.error_response: self.error_response() self.hooks.run('after_error_response') cherrypy.serving.response.finalize() except cherrypy.HTTPRedirect: inst = sys.exc_info()[1] inst.set_response() cherrypy.serving.response.finalize() # ------------------------- Properties ------------------------- # def _get_body_params(self): warnings.warn( 'body_params is deprecated in CherryPy 3.2, will be removed in ' 'CherryPy 3.3.', DeprecationWarning ) return self.body.params body_params = property(_get_body_params, doc=""" If the request Content-Type is 'application/x-www-form-urlencoded' or multipart, this will be a dict of the params pulled from the entity body; that is, it will be the portion of request.params that come from the message body (sometimes called "POST params", although they can be sent with various HTTP method verbs). This value is set between the 'before_request_body' and 'before_handler' hooks (assuming that process_request_body is True). Deprecated in 3.2, will be removed for 3.3 in favor of :attr:`request.body.params<cherrypy._cprequest.RequestBody.params>`.""")
def request(self, sitename, path, method='GET', data=None): url = self.base_url + '/' + sitename + path path = '/' + sitename + path if isinstance(data, dict): for k in data.keys(): if data[k] is None: del data[k] if web.config.debug: web.ctx.infobase_req_count = 1 + web.ctx.get( 'infobase_req_count', 0) a = time.time() _path = path _data = data headers = {} if data: if isinstance(data, dict): data = dict( (web.safestr(k), web.safestr(v)) for k, v in data.items()) data = urlencode(data) headers['Content-Type'] = 'application/x-www-form-urlencoded' if method == 'GET': path += '?' + data data = None stats.begin("infobase", path=path, method=method, data=data) conn = HTTPConnection(self.base_url) env = web.ctx.get('env') or {} if self.auth_token: c = SimpleCookie() c['infobase_auth_token'] = quote(self.auth_token) cookie = c.output(header='').strip() headers['Cookie'] = cookie # pass the remote ip to the infobase server headers['X-REMOTE-IP'] = web.ctx.get('ip') try: conn.request(method, path, data, headers=headers) response = conn.getresponse() stats.end() except socket.error: stats.end(error=True) logger.error("Unable to connect to infobase server", exc_info=True) raise ClientException("503 Service Unavailable", "Unable to connect to infobase server") cookie = response.getheader('Set-Cookie') if cookie: c = SimpleCookie() c.load(cookie) if 'infobase_auth_token' in c: auth_token = c['infobase_auth_token'].value # The auth token will be in urlquoted form, unquote it before use. # Otherwise, it will be quoted twice this value is set as cookie. auth_token = auth_token and unquote(auth_token) self.set_auth_token(auth_token) if web.config.debug: b = time.time() print("%.02f (%s):" % (round(b - a, 2), web.ctx.infobase_req_count), response.status, method, _path, _data, file=web.debug) if response.status == 200: return response.read() else: self.handle_error("%d %s" % (response.status, response.reason), response.read())
def call_wsgi_app(wsgi_app, request, path_info): """ Call the ``wsgi_app`` with ``request`` and return its response. :param wsgi_app: The WSGI application to be run. :type wsgi_app: callable :param request: The Django request. :type request: :class:`django_wsgi.handler.DjangoWSGIRequest` :param path_info: The ``PATH_INFO`` to be used by the WSGI application. :type path: :class:`basestring` :raises django_wsgi.exc.ApplicationCallError: If ``path_info`` is not the last portion of the ``PATH_INFO`` in ``request``. :return: The response from the WSGI application, turned into a Django response. :rtype: :class:`django.http.HttpResponse` """ webob_request = request.webob new_request = webob_request.copy() # Moving the portion of the path consumed by the current view, from the # PATH_INTO to the SCRIPT_NAME: if not request.path_info.endswith(path_info): raise ApplicationCallError("Path %s is not the last portion of the " "PATH_INFO in the original request (%s)" % (path_info, request.path_info)) consumed_path = request.path_info[:-len(path_info)] new_request.path_info = path_info new_request.script_name = webob_request.script_name + consumed_path # If the user has been authenticated in Django, log him in the WSGI app: if request.user.is_authenticated: new_request.remote_user = request.user.username # Cleaning the routing_args, if any. The application should have its own # arguments, without relying on any arguments from a parent application: if "wsgiorg.routing_args" in request.environ: del new_request.environ['wsgiorg.routing_args'] # And the same for the WebOb ad-hoc attributes: if "webob.adhoc_attrs" in request.environ: del new_request.environ['webob.adhoc_attrs'] # Calling the WSGI application and getting its response: (status_line, headers, body) = new_request.call_application(wsgi_app) status_code_raw = status_line.split(" ", 1)[0] status_code = int(status_code_raw) # Turning its response into a Django response: cookies = SimpleCookie() django_response = HttpResponse(body, status=status_code) for (header, value) in headers: if header.upper() == "SET-COOKIE": if PY2 and isinstance(value, text_type): # It can't be Unicode: value = value.encode("us-ascii") cookies.load(value) else: django_response[header] = value # Setting the cookies from Django: for (cookie_name, cookie) in cookies.items(): cookie_attributes = { 'key': cookie_name, 'value': cookie.value, 'expires': cookie['expires'], 'path': cookie['path'], 'domain': cookie['domain'], } if cookie['max-age']: # Starting in Django 1.3 it performs arithmetic operations # with 'Max-Age' cookie_attributes['max_age'] = int(cookie['max-age']) django_response.set_cookie(**cookie_attributes) return django_response
def _generate_proxy(current_path, value): where = value.split(':', 1)[1] cookie_name, routes = where.split(':', 1) routes = dict([ route.strip().split('=', 1) for route in routes.split(',') if route.strip() ]) # cookie_name = 'weblabsessionid' # routes = { # 'route1' : 'http://localhost:10000/weblab/json/', # 'route2' : 'http://localhost:10001/weblab/json/', # 'route3' : 'http://localhost:10002/weblab/json/', # } current_cookie_value = request.cookies.get(cookie_name, '') chosen_url = None for route in routes: if current_cookie_value.endswith(route): chosen_url = routes[route] break if chosen_url is None: chosen_url = random.choice(list(routes.values())) headers = dict(request.headers) headers['X-Forwarded-For'] = request.remote_addr headers['X-Forwarded-Host'] = request.host headers.pop('Host', None) headers.pop('host', None) kwargs = dict(headers = headers, cookies = dict(request.cookies), allow_redirects=False) if request.method == 'GET': method = requests.get elif request.method == 'POST': kwargs['data'] = request.data if request.files: kwargs['files'] = {} for f, f_contents in six.iteritems(request.files): kwargs['files'][f] = [f_contents.filename, f_contents.stream, f_contents.content_type, f_contents.headers] if request.form: headers.pop('Content-Type', None) kwargs['data'] = request.form method = requests.post else: raise Exception("Method not supported") MAX_RETRIES = 5 retry = 0 full_url = chosen_url + current_path if request.args: full_url += '?' + '&'.join([ '%s=%s' % (key, requests.utils.quote(value, '')) for key, value in request.args.items() ]) while True: try: req = method(full_url, **kwargs) break except requests.ConnectionError: if request.method != 'GET': raise retry += 1 if retry >= MAX_RETRIES: raise time.sleep(0.5) cookies = list(req.cookies) headers = dict(req.headers) headers.pop('set-cookie', None) response_kwargs = { 'headers' : headers, 'status' : req.status_code, } if 'content-type' in req.headers: response_kwargs['content_type'] = req.headers['content-type'] response = Response(req.content, **response_kwargs) existing_cookies = SimpleCookie() for header in response.headers: if header[0].lower() == 'set-cookie': try: if six.PY2: cookie_header = header[1].encode('utf8') else: cookie_header = header[1] existing_cookies.load(cookie_header) except Exception as e: print("Error processing cookie header: {}".format(cookie_header)) import traceback traceback.print_exc() for c in req.cookies: if c.name not in existing_cookies.keys(): response.set_cookie(c.name, c.value, path=c.path, expires=c.expires, secure=c.secure) return response
class HTTP(object): HTTPError = HTTPError def __init__(self, script): self.script = script if script: # For testing purposes self.logger = PrefixLoggerAdapter(script.logger, "http") self.headers = {} self.cookies = None self.session_started = False self.request_id = 1 self.session_id = None self.request_middleware = None if self.script: # For testing purposes self.setup_middleware() def get_url(self, path): address = self.script.credentials["address"] port = self.script.credentials.get("http_port") if port: address += ":%s" % port proto = self.script.credentials.get("http_protocol", "http") return "%s://%s%s" % (proto, address, path) def get(self, path, headers=None, cached=False, json=False, eof_mark=None, use_basic=False): """ Perform HTTP GET request :param path: URI :param headers: Dict of additional headers :param cached: Cache result :param json: Decode json if set to True :param eof_mark: Waiting eof_mark in stream for end session (perhaps device return length 0) :param use_basic: Use basic authentication """ self.ensure_session() self.request_id += 1 self.logger.debug("GET %s", path) if cached: cache_key = "get_%s" % path r = self.script.root.http_cache.get(cache_key) if r is not None: self.logger.debug("Use cached result") return r user, password = None, None if use_basic: user = self.script.credentials.get("user") password = self.script.credentials.get("password") # Apply GET middleware url = self.get_url(path) hdr = self._get_effective_headers(headers) if self.request_middleware: for mw in self.request_middleware: url, _, hdr = mw.process_get(url, "", hdr) code, headers, result = fetch_sync( url, headers=hdr, request_timeout=60, follow_redirects=True, allow_proxy=False, validate_cert=False, eof_mark=eof_mark, user=user, password=password, ) if not 200 <= code <= 299: raise HTTPError(msg="HTTP Error (%s)" % result[:256], code=code) self._process_cookies(headers) if json: try: result = ujson.loads(result) except ValueError as e: raise HTTPError("Failed to decode JSON: %s", e) self.logger.debug("Result: %r", result) if cached: self.script.root.http_cache[cache_key] = result return result def post(self, path, data, headers=None, cached=False, json=False, eof_mark=None, use_basic=False): """ Perform HTTP GET request :param path: URI :param headers: Dict of additional headers :param cached: Cache result :param json: Decode json if set to True :param eof_mark: Waiting eof_mark in stream for end session (perhaps device return length 0) :param use_basic: Use basic authentication """ self.ensure_session() self.request_id += 1 self.logger.debug("POST %s %s", path, data) if cached: cache_key = "post_%s" % path r = self.script.root.http_cache.get(cache_key) if r is not None: self.logger.debug("Use cached result") return r user, password = None, None if use_basic: user = self.script.credentials.get("user") password = self.script.credentials.get("password") # Apply POST middleware url = self.get_url(path) hdr = self._get_effective_headers(headers) if self.request_middleware: for mw in self.request_middleware: url, data, hdr = mw.process_post(url, data, hdr) code, headers, result = fetch_sync( url, method="POST", body=data, headers=hdr, request_timeout=60, follow_redirects=True, allow_proxy=False, validate_cert=False, eof_mark=eof_mark, user=user, password=password, ) if not 200 <= code <= 299: raise HTTPError(msg="HTTP Error (%s)" % result[:256], code=code) self._process_cookies(headers) if json: try: return ujson.loads(result) except ValueError as e: raise HTTPError(msg="Failed to decode JSON: %s" % e) self.logger.debug("Result: %r", result) if cached: self.script.root.http_cache[cache_key] = result return result def close(self): if self.session_started: self.shutdown_session() def _process_cookies(self, headers): """ Process and store cookies from response headers :param headers: :return: """ cdata = headers.get("Set-Cookie") if not cdata: return if not self.cookies: self.cookies = SimpleCookie() self.cookies.load(cdata) def get_cookie(self, name): """ Get cookie name by value :param name: :return: Morsel object or None """ if not self.cookies: return None return self.cookies.get(name) def _get_effective_headers(self, headers): """ Append session headers when necessary. Apply effective cookies :param headers: :return: """ if self.headers: if headers: headers = headers.copy() else: headers = {} headers.update(self.headers) elif not headers and self.cookies: headers = {} if self.cookies: headers["Cookie"] = self.cookies.output(header="").lstrip() return headers def set_header(self, name, value): """ Set HTTP header to be set with all following requests :param name: :param value: :return: """ self.logger.debug("Set header: %s = %s", name, value) self.headers[name] = str(value) def set_session_id(self, session_id): """ Set session_id to be reused by middleware :param session_id: :return: None """ if session_id is not None: self.session_id = session_id else: self.session_id = None def ensure_session(self): if not self.session_started: self.session_started = True self.setup_session() def setup_session(self): if self.script.profile.setup_http_session: self.logger.debug("Setup http session") self.script.profile.setup_http_session(self.script) def shutdown_session(self): if self.script.profile.shutdown_http_session: self.logger.debug("Shutdown http session") self.script.profile.shutdown_http_session(self.script) def setup_middleware(self): mw_list = self.script.profile.get_http_request_middleware(self.script) if not mw_list: return self.request_middleware = [] for mw_cfg in mw_list: if isinstance(mw_cfg, tuple): name, cfg = mw_cfg else: name, cfg = mw_cfg, {} if "." in name: # Handler mw_cls = get_handler(name) assert mw_cls assert issubclass(mw_cls, BaseMiddleware) else: # Middleware name mw_cls = loader.get_class(name) self.request_middleware += [mw_cls(self, **cfg)]