def _parse_form_urlencoded(self): # PERF(kgriffs): Technically, we should spend a few more # cycles and parse the content type for real, but # this heuristic will work virtually all the time. if (self.content_type and 'application/x-www-form-urlencoded' in self.content_type): # NOTE(kgriffs): This assumes self.stream has been patched # above in the case of wsgiref, so that self.content_length # is not needed. Normally we just avoid accessing # self.content_length, because it is a little expensive # to call. We could cache self.content_length, but the # overhead to do that won't usually be helpful, since # content length will only ever be read once per # request in most cases. body = self.stream.read() # NOTE(kgriffs): According to http://goo.gl/6rlcux the # body should be US-ASCII. Enforcing this also helps # catch malicious input. try: body = body.decode('ascii') except UnicodeDecodeError: body = None self.log_error('Non-ASCII characters found in form body ' 'with Content-Type of ' 'application/x-www-form-urlencoded. Body ' 'will be ignored.') if body: extra_params = uri.parse_query_string(uri.decode(body)) self._params.update(extra_params)
def _parse_form_urlencoded(self): # NOTE(kgriffs): This assumes self.stream has been patched # above in the case of wsgiref, so that self.content_length # is not needed. Normally we just avoid accessing # self.content_length, because it is a little expensive # to call. We could cache self.content_length, but the # overhead to do that won't usually be helpful, since # content length will only ever be read once per # request in most cases. body = self.stream.read() # NOTE(kgriffs): According to http://goo.gl/6rlcux the # body should be US-ASCII. Enforcing this also helps # catch malicious input. try: body = body.decode('ascii') except UnicodeDecodeError: body = None self.log_error('Non-ASCII characters found in form body ' 'with Content-Type of ' 'application/x-www-form-urlencoded. Body ' 'will be ignored.') if body: extra_params = uri.parse_query_string( uri.decode(body), keep_blank_qs_values=self.options.keep_blank_qs_values, ) self._params.update(extra_params)
def test_prop_uri_decode_models_stdlib_unquote_plus(self): stdlib_unquote = six.moves.urllib.parse.unquote_plus for case in self.uris: case = uri.encode_value(case) expect = stdlib_unquote(case) actual = uri.decode(case) self.assertEqual(expect, actual)
def test_prop_uri_decode_models_stdlib_unquote_plus(self): stdlib_unquote = unquote_plus for case in self.uris: case = uri.encode_value(case) expect = stdlib_unquote(case) actual = uri.decode(case) assert expect == actual
def test_uri_encode_value(self): assert uri.encode_value('abcd') == 'abcd' assert uri.encode_value('abcd') == 'abcd' assert uri.encode_value('ab cd') == 'ab%20cd' assert uri.encode_value('\u00e7') == '%C3%A7' assert uri.encode_value('\u00e7\u20ac') == '%C3%A7%E2%82%AC' assert uri.encode_value('ab/cd') == 'ab%2Fcd' assert uri.encode_value('ab+cd=42,9') == 'ab%2Bcd%3D42%2C9' # NOTE(minesja): Addresses #1872 assert uri.encode_value('%26') == '%2526' assert uri.decode(uri.encode_value('%26')) == '%26'
def test_uri_decode(self): self.assertEqual(uri.decode("abcd"), "abcd") self.assertEqual(uri.decode(u"abcd"), u"abcd") self.assertEqual(uri.decode(u"ab%20cd"), u"ab cd") self.assertEqual(uri.decode("This thing is %C3%A7"), u"This thing is \u00e7") self.assertEqual(uri.decode("This thing is %C3%A7%E2%82%AC"), u"This thing is \u00e7\u20ac") self.assertEqual(uri.decode("ab%2Fcd"), "ab/cd") self.assertEqual(uri.decode("http://example.com?x=ab%2Bcd%3D42%2C9"), "http://example.com?x=ab+cd=42,9")
def test_uri_decode(self): self.assertEqual(uri.decode('abcd'), 'abcd') self.assertEqual(uri.decode(u'abcd'), u'abcd') self.assertEqual(uri.decode(u'ab%20cd'), u'ab cd') self.assertEqual(uri.decode('%C3%A7'), u'\u00e7') self.assertEqual(uri.decode('ab%2Fcd'), 'ab/cd') self.assertEqual(uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9'), 'http://example.com?x=ab+cd=42,9')
def test_uri_encode(self): url = 'http://example.com/v1/fizbit/messages?limit=3&echo=true' assert uri.encode(url) == url url = 'http://example.com/v1/fiz bit/messages' expected = 'http://example.com/v1/fiz%20bit/messages' assert uri.encode(url) == expected url = 'http://example.com/v1/fizbit/messages?limit=3&e\u00e7ho=true' expected = 'http://example.com/v1/fizbit/messages?limit=3&e%C3%A7ho=true' assert uri.encode(url) == expected # NOTE(minesja): Addresses #1872 assert uri.encode('%26') == '%2526' assert uri.decode(uri.encode('%26')) == '%26'
def test_uri_decode(self): assert uri.decode('abcd') == 'abcd' assert uri.decode(u'abcd') == u'abcd' assert uri.decode(u'ab%20cd') == u'ab cd' assert uri.decode('This thing is %C3%A7') == u'This thing is \u00e7' assert uri.decode( 'This thing is %C3%A7%E2%82%AC') == u'This thing is \u00e7\u20ac' assert uri.decode('ab%2Fcd') == 'ab/cd' assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9' ) == 'http://example.com?x=ab+cd=42,9'
def test_uri_decode(self): assert uri.decode('abcd') == 'abcd' assert uri.decode(u'abcd') == u'abcd' assert uri.decode(u'ab%20cd') == u'ab cd' assert uri.decode('This thing is %C3%A7') == u'This thing is \u00e7' assert uri.decode('This thing is %C3%A7%E2%82%AC') == u'This thing is \u00e7\u20ac' assert uri.decode('ab%2Fcd') == 'ab/cd' assert uri.decode( 'http://example.com?x=ab%2Bcd%3D42%2C9' ) == 'http://example.com?x=ab+cd=42,9'
def test_uri_decode(self): self.assertEqual(uri.decode('abcd'), 'abcd') self.assertEqual(uri.decode(u'abcd'), u'abcd') self.assertEqual(uri.decode(u'ab%20cd'), u'ab cd') self.assertEqual(uri.decode('This thing is %C3%A7'), u'This thing is \u00e7') self.assertEqual(uri.decode('This thing is %C3%A7%E2%82%AC'), u'This thing is \u00e7\u20ac') self.assertEqual(uri.decode('ab%2Fcd'), 'ab/cd') self.assertEqual(uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9'), 'http://example.com?x=ab+cd=42,9')
def test_uri_decode_unquote_plus(self): assert uri.decode('/disk/lost+found/fd0') == '/disk/lost found/fd0' assert uri.decode('/disk/lost+found/fd0', unquote_plus=True) == ( '/disk/lost found/fd0') assert uri.decode('/disk/lost+found/fd0', unquote_plus=False) == ( '/disk/lost+found/fd0') assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9') == ( 'http://example.com?x=ab+cd=42,9') assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9', unquote_plus=True) == ( 'http://example.com?x=ab+cd=42,9') assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9', unquote_plus=False) == ( 'http://example.com?x=ab+cd=42,9')
def test_uri_decode_unquote_plus(self, decode_approach): assert uri.decode('/disk/lost+found/fd0') == '/disk/lost found/fd0' assert uri.decode('/disk/lost+found/fd0', unquote_plus=True) == ( '/disk/lost found/fd0') assert uri.decode('/disk/lost+found/fd0', unquote_plus=False) == ( '/disk/lost+found/fd0') assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9') == ( 'http://example.com?x=ab+cd=42,9') assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9', unquote_plus=True) == ( 'http://example.com?x=ab+cd=42,9') assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9', unquote_plus=False) == ( 'http://example.com?x=ab+cd=42,9')
def create_environ(path='/', query_string='', protocol='HTTP/1.1', scheme='http', host=DEFAULT_HOST, port=None, headers=None, app='', body='', method='GET', wsgierrors=None, file_wrapper=None): """Creates a mock PEP-3333 environ ``dict`` for simulating WSGI requests. Keyword Args: path (str): The path for the request (default '/') query_string (str): The query string to simulate, without a leading '?' (default '') protocol (str): The HTTP protocol to simulate (default 'HTTP/1.1'). If set to 'HTTP/1.0', the Host header will not be added to the environment. scheme (str): URL scheme, either 'http' or 'https' (default 'http') host(str): Hostname for the request (default 'falconframework.org') port (str): The TCP port to simulate. Defaults to the standard port used by the given scheme (i.e., 80 for 'http' and 443 for 'https'). headers (dict): Headers as a ``dict`` or an iterable yielding (*key*, *value*) ``tuple``'s app (str): Value for the ``SCRIPT_NAME`` environ variable, described in PEP-333: 'The initial portion of the request URL's "path" that corresponds to the application object, so that the application knows its virtual "location". This may be an empty string, if the application corresponds to the "root" of the server.' (default '') body (str): The body of the request (default ''). Accepts both byte strings and Unicode strings. Unicode strings are encoded as UTF-8 in the request. method (str): The HTTP method to use (default 'GET') wsgierrors (io): The stream to use as *wsgierrors* (default ``sys.stderr``) file_wrapper: Callable that returns an iterable, to be used as the value for *wsgi.file_wrapper* in the environ. """ if query_string and query_string.startswith('?'): raise ValueError("query_string should not start with '?'") body = io.BytesIO(body.encode('utf-8') if isinstance(body, six.text_type) else body) # NOTE(kgriffs): wsgiref, gunicorn, and uWSGI all unescape # the paths before setting PATH_INFO path = uri.decode(path) if six.PY3: # NOTE(kgriffs): The decoded path may contain UTF-8 characters. # But according to the WSGI spec, no strings can contain chars # outside ISO-8859-1. Therefore, to reconcile the URI # encoding standard that allows UTF-8 with the WSGI spec # that does not, WSGI servers tunnel the string via # ISO-8859-1. falcon.testing.create_environ() mimics this # behavior, e.g.: # # tunnelled_path = path.encode('utf-8').decode('iso-8859-1') # # falcon.Request does the following to reverse the process: # # path = tunnelled_path.encode('iso-8859-1').decode('utf-8', 'replace') # path = path.encode('utf-8').decode('iso-8859-1') if six.PY2 and isinstance(path, six.text_type): path = path.encode('utf-8') scheme = scheme.lower() if port is None: port = '80' if scheme == 'http' else '443' else: port = str(port) env = { 'SERVER_PROTOCOL': protocol, 'SERVER_SOFTWARE': 'gunicorn/0.17.0', 'SCRIPT_NAME': app, 'REQUEST_METHOD': method, 'PATH_INFO': path, 'QUERY_STRING': query_string, 'HTTP_USER_AGENT': 'curl/7.24.0 (x86_64-apple-darwin12.0)', 'REMOTE_PORT': '65133', 'RAW_URI': '/', 'REMOTE_ADDR': '127.0.0.1', 'SERVER_NAME': host, 'SERVER_PORT': port, 'wsgi.version': (1, 0), 'wsgi.url_scheme': scheme, 'wsgi.input': body, 'wsgi.errors': wsgierrors or sys.stderr, 'wsgi.multithread': False, 'wsgi.multiprocess': True, 'wsgi.run_once': False } if file_wrapper is not None: env['wsgi.file_wrapper'] = file_wrapper if protocol != 'HTTP/1.0': host_header = host if scheme == 'https': if port != '443': host_header += ':' + port else: if port != '80': host_header += ':' + port env['HTTP_HOST'] = host_header content_length = body.seek(0, 2) body.seek(0) if content_length != 0: env['CONTENT_LENGTH'] = str(content_length) if headers is not None: _add_headers_to_environ(env, headers) return env
def __init__(self, env): """Initialize attributes based on a WSGI environment dict Note: Request is not meant to be instantiated directory by responders. Args: env: A WSGI environment dict passed in from the server. See also the PEP-3333 spec. """ self.env = env self._wsgierrors = env['wsgi.errors'] self.stream = env['wsgi.input'] self.method = env['REQUEST_METHOD'] # Normalize path path = env['PATH_INFO'] if path: if len(path) != 1 and path.endswith('/'): self.path = path[:-1] else: self.path = path else: self.path = '/' # QUERY_STRING isn't required to be in env, so let's check # PERF: if...in is faster than using env.get(...) if 'QUERY_STRING' in env and env['QUERY_STRING']: # TODO(kgriffs): Should this escape individual values instead # of the entire string? The way it is now, this: # # x=ab%2Bcd%3D42%2C9 # # becomes this: # # x=ab+cd=42,9 # self.query_string = uri.decode(env['QUERY_STRING']) else: self.query_string = six.text_type() # PERF: Don't parse it if we don't have to! if self.query_string: self._params = uri.parse_query_string(self.query_string) else: self._params = {} helpers.normalize_headers(env) self._cached_headers = {} self._cached_uri = None self._cached_relative_uri = None # NOTE(kgriffs): Wrap wsgi.input if needed to make read() more robust, # normalizing semantics between, e.g., gunicorn and wsgiref. if isinstance(self.stream, NativeStream): # pragma: nocover # NOTE(kgriffs): coverage can't detect that this *is* actually # covered since the test that does so uses multiprocessing. self.stream = helpers.Body(self.stream, self.content_length)
def test_uri_decode_bad_unicode(self, encoded, expected, decode_approach): assert uri.decode(encoded) == expected
def __init__(self, env): self.env = env if self.context_type is None: # Literal syntax is more efficient than using dict() self.context = {} else: # pylint will detect this as not-callable because it only sees the # declaration of None, not whatever type a subclass may have set. self.context = self.context_type() # pylint: disable=not-callable self._wsgierrors = env['wsgi.errors'] self.stream = env['wsgi.input'] self.method = env['REQUEST_METHOD'] # Normalize path path = env['PATH_INFO'] if path: if len(path) != 1 and path.endswith('/'): self.path = path[:-1] else: self.path = path else: self.path = '/' # QUERY_STRING isn't required to be in env, so let's check # PERF: if...in is faster than using env.get(...) if 'QUERY_STRING' in env and env['QUERY_STRING']: # TODO(kgriffs): Should this escape individual values instead # of the entire string? The way it is now, this: # # x=ab%2Bcd%3D42%2C9 # # becomes this: # # x=ab+cd=42,9 # self.query_string = uri.decode(env['QUERY_STRING']) else: self.query_string = six.text_type() # PERF: Don't parse it if we don't have to! if self.query_string: self._params = uri.parse_query_string(self.query_string) else: self._params = {} helpers.normalize_headers(env) self._cached_headers = {} self._cached_uri = None self._cached_relative_uri = None self.content_type = self._get_header_by_wsgi_name('HTTP_CONTENT_TYPE') # NOTE(kgriffs): Wrap wsgi.input if needed to make read() more robust, # normalizing semantics between, e.g., gunicorn and wsgiref. if isinstance(self.stream, NativeStream): # pragma: nocover # NOTE(kgriffs): coverage can't detect that this *is* actually # covered since the test that does so uses multiprocessing. self.stream = helpers.Body(self.stream, self.content_length) # PERF(kgriffs): Technically, we should spend a few more # cycles and parse the content type for real, but # this heuristic will work virtually all the time. if (self.content_type and 'application/x-www-form-urlencoded' in self.content_type): # NOTE(kgriffs): This assumes self.stream has been patched # above in the case of wsgiref, so that self.content_length # is not needed. Normally we just avoid accessing # self.content_length, because it is a little expensive # to call. We could cache self.content_length, but the # overhead to do that won't usually be helpful, since # content length will only ever be read once per # request in most cases. body = self.stream.read() body = body.decode('ascii') extra_params = uri.parse_query_string(uri.decode(body)) self._params.update(extra_params)
def create_environ(path='/', query_string='', protocol='HTTP/1.1', scheme='http', host=DEFAULT_HOST, port=None, headers=None, app='', body='', method='GET', wsgierrors=None, file_wrapper=None): """Creates a mock PEP-3333 environ ``dict`` for simulating WSGI requests. Args: path (str, optional): The path for the request (default '/') query_string (str, optional): The query string to simulate, without a leading '?' (default '') protocol (str, optional): The HTTP protocol to simulate (default 'HTTP/1.1'). If set to 'HTTP/1.0', the Host header will not be added to the environment. scheme (str): URL scheme, either 'http' or 'https' (default 'http') host(str): Hostname for the request (default 'falconframework.org') port (str or int, optional): The TCP port to simulate. Defaults to the standard port used by the given scheme (i.e., 80 for 'http' and 443 for 'https'). headers (dict or list, optional): Headers as a ``dict`` or an iterable collection of (*key*, *value*) ``tuple``'s app (str): Value for the ``SCRIPT_NAME`` environ variable, described in PEP-333: 'The initial portion of the request URL's "path" that corresponds to the application object, so that the application knows its virtual "location". This may be an empty string, if the application corresponds to the "root" of the server.' (default '') body (str or unicode): The body of the request (default '') method (str): The HTTP method to use (default 'GET') wsgierrors (io): The stream to use as *wsgierrors* (default ``sys.stderr``) file_wrapper: Callable that returns an iterable, to be used as the value for *wsgi.file_wrapper* in the environ. """ body = io.BytesIO(body.encode('utf-8') if isinstance(body, six.text_type) else body) # NOTE(kgriffs): wsgiref, gunicorn, and uWSGI all unescape # the paths before setting PATH_INFO path = uri.decode(path) # NOTE(kgriffs): nocover since this branch will never be # taken in Python3. However, the branch is tested under Py2, # in test_utils.TestFalconTesting.test_unicode_path_in_create_environ if six.PY2 and isinstance(path, six.text_type): # pragma: nocover path = path.encode('utf-8') scheme = scheme.lower() if port is None: port = '80' if scheme == 'http' else '443' else: port = str(port) env = { 'SERVER_PROTOCOL': protocol, 'SERVER_SOFTWARE': 'gunicorn/0.17.0', 'SCRIPT_NAME': app, 'REQUEST_METHOD': method, 'PATH_INFO': path, 'QUERY_STRING': query_string, 'HTTP_USER_AGENT': 'curl/7.24.0 (x86_64-apple-darwin12.0)', 'REMOTE_PORT': '65133', 'RAW_URI': '/', 'REMOTE_ADDR': '127.0.0.1', 'SERVER_NAME': host, 'SERVER_PORT': port, 'wsgi.url_scheme': scheme, 'wsgi.input': body, 'wsgi.errors': wsgierrors or sys.stderr, 'wsgi.multithread': False, 'wsgi.multiprocess': True, 'wsgi.run_once': False } if file_wrapper is not None: env['wsgi.file_wrapper'] = file_wrapper if protocol != 'HTTP/1.0': host_header = host if scheme == 'https': if port != '443': host_header += ':' + port else: if port != '80': host_header += ':' + port env['HTTP_HOST'] = host_header content_length = body.seek(0, 2) body.seek(0) if content_length != 0: env['CONTENT_LENGTH'] = content_length if headers is not None: _add_headers_to_environ(env, headers) return env
def test_uri_decode_replace_bad_unicode(self, encoded, expected): assert uri.decode(encoded) == expected
def create_environ(path='/', query_string='', protocol='HTTP/1.1', scheme='http', host=DEFAULT_HOST, port=None, headers=None, app='', body='', method='GET', wsgierrors=None, file_wrapper=None): """Creates a mock PEP-3333 environ dict for simulating WSGI requests. Args: path (str, optional): The path for the request (default '/') query_string (str, optional): The query string to simulate, without a leading '?' (default '') protocol (str, optional): The HTTP protocol to simulate (default 'HTTP/1.1'). If set 'HTTP/1.0', the Host header will not be added to the environment. scheme (str): URL scheme, either 'http' or 'https' (default 'http') host(str): Hostname for the request (default 'falconframework.org') port (str or int, optional): The TCP port to simulate. Defaults to the standard port used by the given scheme (i.e., 80 for 'http' and 443 for 'https'). headers (dict or list, optional): Headers as a dict or an iterable collection of ``(key, value)`` tuples app (str): Value for the SCRIPT_NAME environ variable, described in PEP-333: 'The initial portion of the request URL's "path" that corresponds to the application object, so that the application knows its virtual "location". This may be an empty string, if the application corresponds to the "root" of the server.' (default '') body (str or unicode): The body of the request (default '') method (str): The HTTP method to use (default 'GET') wsgierrors (io): The stream to use as wsgierrors (default sys.stderr) file_wrapper: Callable that returns an iterable, to be used as the value for 'wsgi.file_wrapper' in the environ. """ body = io.BytesIO( body.encode('utf-8') if isinstance(body, six.text_type) else body) # NOTE(kgriffs): wsgiref, gunicorn, and uWSGI all unescape # the paths before setting PATH_INFO path = uri.decode(path) # NOTE(kgriffs): nocover since this branch will never be # taken in Python3. However, the branch is tested under Py2, # in test_utils.TestFalconTesting.test_unicode_path_in_create_environ if six.PY2 and isinstance(path, six.text_type): # pragma: nocover path = path.encode('utf-8') scheme = scheme.lower() if port is None: port = '80' if scheme == 'http' else '443' else: port = str(port) env = { 'SERVER_PROTOCOL': protocol, 'SERVER_SOFTWARE': 'gunicorn/0.17.0', 'SCRIPT_NAME': app, 'REQUEST_METHOD': method, 'PATH_INFO': path, 'QUERY_STRING': query_string, 'HTTP_USER_AGENT': 'curl/7.24.0 (x86_64-apple-darwin12.0)', 'REMOTE_PORT': '65133', 'RAW_URI': '/', 'REMOTE_ADDR': '127.0.0.1', 'SERVER_NAME': host, 'SERVER_PORT': port, 'wsgi.url_scheme': scheme, 'wsgi.input': body, 'wsgi.errors': wsgierrors or sys.stderr, 'wsgi.multithread': False, 'wsgi.multiprocess': True, 'wsgi.run_once': False } if file_wrapper is not None: env['wsgi.file_wrapper'] = file_wrapper if protocol != 'HTTP/1.0': host_header = host if scheme == 'https': if port != '443': host_header += ':' + port else: if port != '80': host_header += ':' + port env['HTTP_HOST'] = host_header content_length = body.seek(0, 2) body.seek(0) if content_length != 0: env['CONTENT_LENGTH'] = content_length if headers is not None: _add_headers_to_environ(env, headers) return env
def create_environ(path='/', query_string='', http_version='1.1', scheme='http', host=DEFAULT_HOST, port=None, headers=None, app=None, body='', method='GET', wsgierrors=None, file_wrapper=None, remote_addr=None, root_path=None) -> Dict[str, Any]: """Creates a mock PEP-3333 environ ``dict`` for simulating WSGI requests. Keyword Args: path (str): The path for the request (default '/') query_string (str): The query string to simulate, without a leading '?' (default ''). The query string is passed as-is (it will not be percent-encoded). http_version (str): The HTTP version to simulate. Must be either '2', '2.0', 1.1', '1.0', or '1' (default '1.1'). If set to '1.0', the Host header will not be added to the scope. scheme (str): URL scheme, either 'http' or 'https' (default 'http') host(str): Hostname for the request (default 'falconframework.org') port (int): The TCP port to simulate. Defaults to the standard port used by the given scheme (i.e., 80 for 'http' and 443 for 'https'). A string may also be passed, as long as it can be parsed as an int. headers (dict): Headers as a dict-like (Mapping) object, or an iterable yielding a series of two-member (*name*, *value*) iterables. Each pair of strings provides the name and value for an HTTP header. If desired, multiple header values may be combined into a single (*name*, *value*) pair by joining the values with a comma when the header in question supports the list format (see also RFC 7230 and RFC 7231). Header names are not case-sensitive. root_path (str): Value for the ``SCRIPT_NAME`` environ variable, described in PEP-333: 'The initial portion of the request URL's "path" that corresponds to the application object, so that the application knows its virtual "location". This may be an empty string, if the application corresponds to the "root" of the server.' (default '') app (str): Deprecated alias for `root_path`. If both kwargs are passed, `root_path` takes precedence. body (str): The body of the request (default ''). The value will be encoded as UTF-8 in the WSGI environ. Alternatively, a byte string may be passed, in which case it will be used as-is. method (str): The HTTP method to use (default 'GET') wsgierrors (io): The stream to use as *wsgierrors* (default ``sys.stderr``) file_wrapper: Callable that returns an iterable, to be used as the value for *wsgi.file_wrapper* in the environ. remote_addr (str): Remote address for the request to use as the 'REMOTE_ADDR' environ variable (default None) """ http_version = _fixup_http_version(http_version) if query_string and query_string.startswith('?'): raise ValueError("query_string should not start with '?'") body = io.BytesIO(body.encode('utf-8') if isinstance(body, str) else body) # NOTE(kgriffs): wsgiref, gunicorn, and uWSGI all unescape # the paths before setting PATH_INFO path = uri.decode(path, unquote_plus=False) # NOTE(kgriffs): The decoded path may contain UTF-8 characters. # But according to the WSGI spec, no strings can contain chars # outside ISO-8859-1. Therefore, to reconcile the URI # encoding standard that allows UTF-8 with the WSGI spec # that does not, WSGI servers tunnel the string via # ISO-8859-1. falcon.testing.create_environ() mimics this # behavior, e.g.: # # tunnelled_path = path.encode('utf-8').decode('iso-8859-1') # # falcon.Request does the following to reverse the process: # # path = tunnelled_path.encode('iso-8859-1').decode('utf-8', 'replace') # path = path.encode('utf-8').decode('iso-8859-1') scheme = scheme.lower() if port is None: port = '80' if scheme == 'http' else '443' else: # NOTE(kgriffs): Running it through int() first ensures that if # a string was passed, it is a valid integer. port = str(int(port)) root_path = root_path or app or '' # NOTE(kgriffs): Judging by the algorithm given in PEP-3333 for # reconstructing the URL, SCRIPT_NAME is expected to contain a # preceding slash character. if root_path and not root_path.startswith('/'): root_path = '/' + root_path env = { 'SERVER_PROTOCOL': 'HTTP/' + http_version, 'SERVER_SOFTWARE': 'gunicorn/0.17.0', 'SCRIPT_NAME': (root_path or ''), 'REQUEST_METHOD': method, 'PATH_INFO': path, 'QUERY_STRING': query_string, 'REMOTE_PORT': '65133', 'RAW_URI': '/', 'SERVER_NAME': host, 'SERVER_PORT': port, 'wsgi.version': (1, 0), 'wsgi.url_scheme': scheme, 'wsgi.input': body, 'wsgi.errors': wsgierrors or sys.stderr, 'wsgi.multithread': False, 'wsgi.multiprocess': True, 'wsgi.run_once': False } # NOTE(kgriffs): It has been observed that WSGI servers do not always # set the REMOTE_ADDR variable, so we don't always set it either, to # ensure the framework/app handles that case correctly. if remote_addr: env['REMOTE_ADDR'] = remote_addr if file_wrapper is not None: env['wsgi.file_wrapper'] = file_wrapper if http_version != '1.0': host_header = host if scheme == 'https': if port != '443': host_header += ':' + port else: if port != '80': host_header += ':' + port env['HTTP_HOST'] = host_header content_length = body.seek(0, 2) body.seek(0) if content_length != 0: env['CONTENT_LENGTH'] = str(content_length) if headers is not None: _add_headers_to_environ(env, headers) return env
def create_scope(path='/', query_string='', method='GET', headers=None, host=DEFAULT_HOST, scheme=None, port=None, http_version='1.1', remote_addr=None, root_path=None, content_length=None, include_server=True) -> Dict[str, Any]: """Create a mock ASGI scope ``dict`` for simulating ASGI requests. Keyword Args: path (str): The path for the request (default '/') query_string (str): The query string to simulate, without a leading '?' (default ''). The query string is passed as-is (it will not be percent-encoded). method (str): The HTTP method to use (default 'GET') headers (dict): Headers as a dict-like (Mapping) object, or an iterable yielding a series of two-member (*name*, *value*) iterables. Each pair of strings provides the name and value for an HTTP header. If desired, multiple header values may be combined into a single (*name*, *value*) pair by joining the values with a comma when the header in question supports the list format (see also RFC 7230 and RFC 7231). When the request will include a body, the Content-Length header should be included in this list. Header names are not case-sensitive. host(str): Hostname for the request (default 'falconframework.org'). This also determines the the value of the Host header in the request. scheme (str): URL scheme, either 'http' or 'https' (default 'http') port (int): The TCP port to simulate. Defaults to the standard port used by the given scheme (i.e., 80 for 'http' and 443 for 'https'). A string may also be passed, as long as it can be parsed as an int. http_version (str): The HTTP version to simulate. Must be either '2', '2.0', 1.1', '1.0', or '1' (default '1.1'). If set to '1.0', the Host header will not be added to the scope. remote_addr (str): Remote address for the request to use for the 'client' field in the connection scope (default None) root_path (str): The root path this application is mounted at; same as SCRIPT_NAME in WSGI (default ''). content_length (int): The expected content length of the request body (default ``None``). If specified, this value will be used to set the Content-Length header in the request. include_server (bool): Set to ``False`` to not set the 'server' key in the scope ``dict`` (default ``True``). """ http_version = _fixup_http_version(http_version) path = uri.decode(path, unquote_plus=False) # NOTE(kgriffs): Handles both None and '' query_string = query_string.encode() if query_string else b'' if query_string and query_string.startswith(b'?'): raise ValueError("query_string should not start with '?'") scope = { 'type': 'http', 'asgi': { 'version': '3.0', 'spec_version': '2.1', }, 'http_version': http_version, 'method': method.upper(), 'path': path, 'query_string': query_string, } # NOTE(kgriffs): Explicitly test against None so that the caller # is able to simulate setting app to an empty string if they # need to cover that branch in their code. if root_path is not None: # NOTE(kgriffs): Judging by the algorithm given in PEP-3333 for # reconstructing the URL, SCRIPT_NAME is expected to contain a # preceding slash character. Since ASGI states that this value is # the same as WSGI's SCRIPT_NAME, we will follow suit here. if root_path and not root_path.startswith('/'): scope['root_path'] = '/' + root_path else: scope['root_path'] = root_path if scheme: if scheme not in ('http', 'https'): raise ValueError("scheme must be either 'http' or 'https'") scope['scheme'] = scheme if port is None: if (scheme or 'http') == 'http': port = 80 else: port = 443 else: port = int(port) if remote_addr: # NOTE(kgriffs): Choose from the standard IANA dynamic range remote_port = random.randint(49152, 65535) # NOTE(kgriffs): Expose as an iterable to ensure the framework/app # isn't hard-coded to only work with a list or tuple. scope['client'] = iter([remote_addr, remote_port]) if include_server: scope['server'] = iter([host, port]) _add_headers_to_scope(scope, headers, content_length, host, port, scheme, http_version) return scope
def __init__(self, env, options=None): global _maybe_wrap_wsgi_stream self.env = env self.options = options if options else RequestOptions() if self.context_type is None: # Literal syntax is more efficient than using dict() self.context = {} else: # pylint will detect this as not-callable because it only sees the # declaration of None, not whatever type a subclass may have set. self.context = self.context_type() # pylint: disable=not-callable self._wsgierrors = env['wsgi.errors'] self.stream = env['wsgi.input'] self.method = env['REQUEST_METHOD'] # Normalize path path = env['PATH_INFO'] if path: if len(path) != 1 and path.endswith('/'): self.path = path[:-1] else: self.path = path else: self.path = '/' self._params = {} # PERF(kgriffs): if...in is faster than using env.get(...) if 'QUERY_STRING' in env: query_str = env['QUERY_STRING'] if query_str: self.query_string = uri.decode(query_str) self._params = uri.parse_query_string( self.query_string, keep_blank_qs_values=self.options.keep_blank_qs_values, ) else: self.query_string = six.text_type() else: self.query_string = six.text_type() self._cached_headers = None self._cached_uri = None self._cached_relative_uri = None try: self.content_type = self.env['CONTENT_TYPE'] except KeyError: self.content_type = None # NOTE(kgriffs): Wrap wsgi.input if needed to make read() more robust, # normalizing semantics between, e.g., gunicorn and wsgiref. if _maybe_wrap_wsgi_stream: if isinstance(self.stream, NativeStream): # NOTE(kgriffs): This is covered by tests, it's just that # coverage can't figure this out for some reason (TBD). self._wrap_stream() # pragma nocover else: # PERF(kgriffs): If self.stream does not need to be wrapped # this time, it never needs to be wrapped since the server # will continue using the same type for wsgi.input. _maybe_wrap_wsgi_stream = False # PERF(kgriffs): Technically, we should spend a few more # cycles and parse the content type for real, but # this heuristic will work virtually all the time. if (self.content_type is not None and 'application/x-www-form-urlencoded' in self.content_type): self._parse_form_urlencoded()
def __init__(self, env): self.env = env if self.context_type is None: # Literal syntax is more efficient than using dict() self.context = {} else: # pylint will detect this as not-callable because it only sees the # declaration of None, not whatever type a subclass may have set. self.context = self.context_type() # pylint: disable=not-callable self._wsgierrors = env['wsgi.errors'] self.stream = env['wsgi.input'] self.method = env['REQUEST_METHOD'] # Normalize path path = env['PATH_INFO'] if path: if len(path) != 1 and path.endswith('/'): self.path = path[:-1] else: self.path = path else: self.path = '/' # QUERY_STRING isn't required to be in env, so let's check # PERF: if...in is faster than using env.get(...) if 'QUERY_STRING' in env and env['QUERY_STRING']: # TODO(kgriffs): Should this escape individual values instead # of the entire string? The way it is now, this: # # x=ab%2Bcd%3D42%2C9 # # becomes this: # # x=ab+cd=42,9 # self.query_string = uri.decode(env['QUERY_STRING']) else: self.query_string = six.text_type() # PERF: Don't parse it if we don't have to! if self.query_string: self._params = uri.parse_query_string(self.query_string) else: self._params = {} helpers.normalize_headers(env) self._cached_headers = {} self._cached_uri = None self._cached_relative_uri = None self.content_type = self._get_header_by_wsgi_name('HTTP_CONTENT_TYPE') # NOTE(kgriffs): Wrap wsgi.input if needed to make read() more robust, # normalizing semantics between, e.g., gunicorn and wsgiref. if isinstance(self.stream, NativeStream): # pragma: nocover # NOTE(kgriffs): coverage can't detect that this *is* actually # covered since the test that does so uses multiprocessing. self.stream = helpers.Body(self.stream, self.content_length) self._parse_form_urlencoded()