Beispiel #1
0
    def _parse_form_urlencoded(self):
        # PERF(kgriffs): Technically, we should spend a few more
        # cycles and parse the content type for real, but
        # this heuristic will work virtually all the time.
        if (self.content_type and
                'application/x-www-form-urlencoded' in self.content_type):

            # NOTE(kgriffs): This assumes self.stream has been patched
            # above in the case of wsgiref, so that self.content_length
            # is not needed. Normally we just avoid accessing
            # self.content_length, because it is a little expensive
            # to call. We could cache self.content_length, but the
            # overhead to do that won't usually be helpful, since
            # content length will only ever be read once per
            # request in most cases.
            body = self.stream.read()

            # NOTE(kgriffs): According to http://goo.gl/6rlcux the
            # body should be US-ASCII. Enforcing this also helps
            # catch malicious input.
            try:
                body = body.decode('ascii')
            except UnicodeDecodeError:
                body = None
                self.log_error('Non-ASCII characters found in form body '
                               'with Content-Type of '
                               'application/x-www-form-urlencoded. Body '
                               'will be ignored.')

            if body:
                extra_params = uri.parse_query_string(uri.decode(body))
                self._params.update(extra_params)
Beispiel #2
0
    def _parse_form_urlencoded(self):
        # NOTE(kgriffs): This assumes self.stream has been patched
        # above in the case of wsgiref, so that self.content_length
        # is not needed. Normally we just avoid accessing
        # self.content_length, because it is a little expensive
        # to call. We could cache self.content_length, but the
        # overhead to do that won't usually be helpful, since
        # content length will only ever be read once per
        # request in most cases.
        body = self.stream.read()

        # NOTE(kgriffs): According to http://goo.gl/6rlcux the
        # body should be US-ASCII. Enforcing this also helps
        # catch malicious input.
        try:
            body = body.decode('ascii')
        except UnicodeDecodeError:
            body = None
            self.log_error('Non-ASCII characters found in form body '
                           'with Content-Type of '
                           'application/x-www-form-urlencoded. Body '
                           'will be ignored.')

        if body:
            extra_params = uri.parse_query_string(
                uri.decode(body),
                keep_blank_qs_values=self.options.keep_blank_qs_values,
            )

            self._params.update(extra_params)
Beispiel #3
0
    def test_prop_uri_decode_models_stdlib_unquote_plus(self):
        stdlib_unquote = six.moves.urllib.parse.unquote_plus
        for case in self.uris:
            case = uri.encode_value(case)

            expect = stdlib_unquote(case)
            actual = uri.decode(case)
            self.assertEqual(expect, actual)
Beispiel #4
0
    def test_prop_uri_decode_models_stdlib_unquote_plus(self):
        stdlib_unquote = unquote_plus
        for case in self.uris:
            case = uri.encode_value(case)

            expect = stdlib_unquote(case)
            actual = uri.decode(case)
            assert expect == actual
Beispiel #5
0
    def test_uri_encode_value(self):
        assert uri.encode_value('abcd') == 'abcd'
        assert uri.encode_value('abcd') == 'abcd'
        assert uri.encode_value('ab cd') == 'ab%20cd'
        assert uri.encode_value('\u00e7') == '%C3%A7'
        assert uri.encode_value('\u00e7\u20ac') == '%C3%A7%E2%82%AC'
        assert uri.encode_value('ab/cd') == 'ab%2Fcd'
        assert uri.encode_value('ab+cd=42,9') == 'ab%2Bcd%3D42%2C9'

        # NOTE(minesja): Addresses #1872
        assert uri.encode_value('%26') == '%2526'
        assert uri.decode(uri.encode_value('%26')) == '%26'
Beispiel #6
0
    def test_uri_decode(self):
        self.assertEqual(uri.decode("abcd"), "abcd")
        self.assertEqual(uri.decode(u"abcd"), u"abcd")
        self.assertEqual(uri.decode(u"ab%20cd"), u"ab cd")

        self.assertEqual(uri.decode("This thing is %C3%A7"), u"This thing is \u00e7")

        self.assertEqual(uri.decode("This thing is %C3%A7%E2%82%AC"), u"This thing is \u00e7\u20ac")

        self.assertEqual(uri.decode("ab%2Fcd"), "ab/cd")

        self.assertEqual(uri.decode("http://example.com?x=ab%2Bcd%3D42%2C9"), "http://example.com?x=ab+cd=42,9")
Beispiel #7
0
    def test_uri_decode(self):
        self.assertEqual(uri.decode('abcd'), 'abcd')
        self.assertEqual(uri.decode(u'abcd'), u'abcd')
        self.assertEqual(uri.decode(u'ab%20cd'), u'ab cd')
        self.assertEqual(uri.decode('%C3%A7'), u'\u00e7')
        self.assertEqual(uri.decode('ab%2Fcd'), 'ab/cd')

        self.assertEqual(uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9'),
                         'http://example.com?x=ab+cd=42,9')
Beispiel #8
0
    def test_uri_encode(self):
        url = 'http://example.com/v1/fizbit/messages?limit=3&echo=true'
        assert uri.encode(url) == url

        url = 'http://example.com/v1/fiz bit/messages'
        expected = 'http://example.com/v1/fiz%20bit/messages'
        assert uri.encode(url) == expected

        url = 'http://example.com/v1/fizbit/messages?limit=3&e\u00e7ho=true'
        expected = 'http://example.com/v1/fizbit/messages?limit=3&e%C3%A7ho=true'
        assert uri.encode(url) == expected

        # NOTE(minesja): Addresses #1872
        assert uri.encode('%26') == '%2526'
        assert uri.decode(uri.encode('%26')) == '%26'
Beispiel #9
0
    def test_uri_decode(self):
        assert uri.decode('abcd') == 'abcd'
        assert uri.decode(u'abcd') == u'abcd'
        assert uri.decode(u'ab%20cd') == u'ab cd'

        assert uri.decode('This thing is %C3%A7') == u'This thing is \u00e7'

        assert uri.decode(
            'This thing is %C3%A7%E2%82%AC') == u'This thing is \u00e7\u20ac'

        assert uri.decode('ab%2Fcd') == 'ab/cd'

        assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9'
                          ) == 'http://example.com?x=ab+cd=42,9'
Beispiel #10
0
    def test_uri_decode(self):
        assert uri.decode('abcd') == 'abcd'
        assert uri.decode(u'abcd') == u'abcd'
        assert uri.decode(u'ab%20cd') == u'ab cd'

        assert uri.decode('This thing is %C3%A7') == u'This thing is \u00e7'

        assert uri.decode('This thing is %C3%A7%E2%82%AC') == u'This thing is \u00e7\u20ac'

        assert uri.decode('ab%2Fcd') == 'ab/cd'

        assert uri.decode(
            'http://example.com?x=ab%2Bcd%3D42%2C9'
        ) == 'http://example.com?x=ab+cd=42,9'
Beispiel #11
0
    def test_uri_decode(self):
        self.assertEqual(uri.decode('abcd'), 'abcd')
        self.assertEqual(uri.decode(u'abcd'), u'abcd')
        self.assertEqual(uri.decode(u'ab%20cd'), u'ab cd')

        self.assertEqual(uri.decode('This thing is %C3%A7'),
                         u'This thing is \u00e7')

        self.assertEqual(uri.decode('This thing is %C3%A7%E2%82%AC'),
                         u'This thing is \u00e7\u20ac')

        self.assertEqual(uri.decode('ab%2Fcd'), 'ab/cd')

        self.assertEqual(uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9'),
                         'http://example.com?x=ab+cd=42,9')
Beispiel #12
0
    def test_uri_decode(self):
        self.assertEqual(uri.decode('abcd'), 'abcd')
        self.assertEqual(uri.decode(u'abcd'), u'abcd')
        self.assertEqual(uri.decode(u'ab%20cd'), u'ab cd')

        self.assertEqual(uri.decode('This thing is %C3%A7'),
                         u'This thing is \u00e7')

        self.assertEqual(uri.decode('This thing is %C3%A7%E2%82%AC'),
                         u'This thing is \u00e7\u20ac')

        self.assertEqual(uri.decode('ab%2Fcd'), 'ab/cd')

        self.assertEqual(uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9'),
                         'http://example.com?x=ab+cd=42,9')
Beispiel #13
0
    def test_uri_decode_unquote_plus(self):
        assert uri.decode('/disk/lost+found/fd0') == '/disk/lost found/fd0'
        assert uri.decode('/disk/lost+found/fd0', unquote_plus=True) == (
            '/disk/lost found/fd0')
        assert uri.decode('/disk/lost+found/fd0', unquote_plus=False) == (
            '/disk/lost+found/fd0')

        assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9') == (
            'http://example.com?x=ab+cd=42,9')
        assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9', unquote_plus=True) == (
            'http://example.com?x=ab+cd=42,9')
        assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9', unquote_plus=False) == (
            'http://example.com?x=ab+cd=42,9')
Beispiel #14
0
    def test_uri_decode_unquote_plus(self, decode_approach):
        assert uri.decode('/disk/lost+found/fd0') == '/disk/lost found/fd0'
        assert uri.decode('/disk/lost+found/fd0', unquote_plus=True) == (
            '/disk/lost found/fd0')
        assert uri.decode('/disk/lost+found/fd0', unquote_plus=False) == (
            '/disk/lost+found/fd0')

        assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9') == (
            'http://example.com?x=ab+cd=42,9')
        assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9', unquote_plus=True) == (
            'http://example.com?x=ab+cd=42,9')
        assert uri.decode('http://example.com?x=ab%2Bcd%3D42%2C9', unquote_plus=False) == (
            'http://example.com?x=ab+cd=42,9')
Beispiel #15
0
def create_environ(path='/', query_string='', protocol='HTTP/1.1',
                   scheme='http', host=DEFAULT_HOST, port=None,
                   headers=None, app='', body='', method='GET',
                   wsgierrors=None, file_wrapper=None):

    """Creates a mock PEP-3333 environ ``dict`` for simulating WSGI requests.

    Keyword Args:
        path (str): The path for the request (default '/')
        query_string (str): The query string to simulate, without a
            leading '?' (default '')
        protocol (str): The HTTP protocol to simulate
            (default 'HTTP/1.1'). If set to 'HTTP/1.0', the Host header
            will not be added to the environment.
        scheme (str): URL scheme, either 'http' or 'https' (default 'http')
        host(str): Hostname for the request (default 'falconframework.org')
        port (str): The TCP port to simulate. Defaults to
            the standard port used by the given scheme (i.e., 80 for 'http'
            and 443 for 'https').
        headers (dict): Headers as a ``dict`` or an iterable yielding
            (*key*, *value*) ``tuple``'s
        app (str): Value for the ``SCRIPT_NAME`` environ variable, described in
            PEP-333: 'The initial portion of the request URL's "path" that
            corresponds to the application object, so that the application
            knows its virtual "location". This may be an empty string, if the
            application corresponds to the "root" of the server.' (default '')
        body (str): The body of the request (default ''). Accepts both byte
            strings and Unicode strings. Unicode strings are encoded as UTF-8
            in the request.
        method (str): The HTTP method to use (default 'GET')
        wsgierrors (io): The stream to use as *wsgierrors*
            (default ``sys.stderr``)
        file_wrapper: Callable that returns an iterable, to be used as
            the value for *wsgi.file_wrapper* in the environ.

    """

    if query_string and query_string.startswith('?'):
        raise ValueError("query_string should not start with '?'")

    body = io.BytesIO(body.encode('utf-8')
                      if isinstance(body, six.text_type) else body)

    # NOTE(kgriffs): wsgiref, gunicorn, and uWSGI all unescape
    # the paths before setting PATH_INFO
    path = uri.decode(path)

    if six.PY3:
        # NOTE(kgriffs): The decoded path may contain UTF-8 characters.
        # But according to the WSGI spec, no strings can contain chars
        # outside ISO-8859-1. Therefore, to reconcile the URI
        # encoding standard that allows UTF-8 with the WSGI spec
        # that does not, WSGI servers tunnel the string via
        # ISO-8859-1. falcon.testing.create_environ() mimics this
        # behavior, e.g.:
        #
        #   tunnelled_path = path.encode('utf-8').decode('iso-8859-1')
        #
        # falcon.Request does the following to reverse the process:
        #
        #   path = tunnelled_path.encode('iso-8859-1').decode('utf-8', 'replace')
        #
        path = path.encode('utf-8').decode('iso-8859-1')

    if six.PY2 and isinstance(path, six.text_type):
        path = path.encode('utf-8')

    scheme = scheme.lower()
    if port is None:
        port = '80' if scheme == 'http' else '443'
    else:
        port = str(port)

    env = {
        'SERVER_PROTOCOL': protocol,
        'SERVER_SOFTWARE': 'gunicorn/0.17.0',
        'SCRIPT_NAME': app,
        'REQUEST_METHOD': method,
        'PATH_INFO': path,
        'QUERY_STRING': query_string,
        'HTTP_USER_AGENT': 'curl/7.24.0 (x86_64-apple-darwin12.0)',
        'REMOTE_PORT': '65133',
        'RAW_URI': '/',
        'REMOTE_ADDR': '127.0.0.1',
        'SERVER_NAME': host,
        'SERVER_PORT': port,

        'wsgi.version': (1, 0),
        'wsgi.url_scheme': scheme,
        'wsgi.input': body,
        'wsgi.errors': wsgierrors or sys.stderr,
        'wsgi.multithread': False,
        'wsgi.multiprocess': True,
        'wsgi.run_once': False
    }

    if file_wrapper is not None:
        env['wsgi.file_wrapper'] = file_wrapper

    if protocol != 'HTTP/1.0':
        host_header = host

        if scheme == 'https':
            if port != '443':
                host_header += ':' + port
        else:
            if port != '80':
                host_header += ':' + port

        env['HTTP_HOST'] = host_header

    content_length = body.seek(0, 2)
    body.seek(0)

    if content_length != 0:
        env['CONTENT_LENGTH'] = str(content_length)

    if headers is not None:
        _add_headers_to_environ(env, headers)

    return env
Beispiel #16
0
    def __init__(self, env):
        """Initialize attributes based on a WSGI environment dict

        Note: Request is not meant to be instantiated directory by responders.

        Args:
            env: A WSGI environment dict passed in from the server. See also
                the PEP-3333 spec.

        """
        self.env = env

        self._wsgierrors = env['wsgi.errors']
        self.stream = env['wsgi.input']
        self.method = env['REQUEST_METHOD']

        # Normalize path
        path = env['PATH_INFO']
        if path:
            if len(path) != 1 and path.endswith('/'):
                self.path = path[:-1]
            else:
                self.path = path
        else:
            self.path = '/'

        # QUERY_STRING isn't required to be in env, so let's check
        # PERF: if...in is faster than using env.get(...)
        if 'QUERY_STRING' in env and env['QUERY_STRING']:

            # TODO(kgriffs): Should this escape individual values instead
            # of the entire string? The way it is now, this:
            #
            #   x=ab%2Bcd%3D42%2C9
            #
            # becomes this:
            #
            #   x=ab+cd=42,9
            #
            self.query_string = uri.decode(env['QUERY_STRING'])

        else:
            self.query_string = six.text_type()

        # PERF: Don't parse it if we don't have to!
        if self.query_string:
            self._params = uri.parse_query_string(self.query_string)
        else:
            self._params = {}

        helpers.normalize_headers(env)
        self._cached_headers = {}

        self._cached_uri = None
        self._cached_relative_uri = None

        # NOTE(kgriffs): Wrap wsgi.input if needed to make read() more robust,
        # normalizing semantics between, e.g., gunicorn and wsgiref.
        if isinstance(self.stream, NativeStream):  # pragma: nocover
            # NOTE(kgriffs): coverage can't detect that this *is* actually
            # covered since the test that does so uses multiprocessing.
            self.stream = helpers.Body(self.stream, self.content_length)
Beispiel #17
0
 def test_uri_decode_bad_unicode(self, encoded, expected, decode_approach):
     assert uri.decode(encoded) == expected
Beispiel #18
0
    def __init__(self, env):
        self.env = env

        if self.context_type is None:
            # Literal syntax is more efficient than using dict()
            self.context = {}
        else:
            # pylint will detect this as not-callable because it only sees the
            # declaration of None, not whatever type a subclass may have set.
            self.context = self.context_type()  # pylint: disable=not-callable

        self._wsgierrors = env['wsgi.errors']
        self.stream = env['wsgi.input']
        self.method = env['REQUEST_METHOD']

        # Normalize path
        path = env['PATH_INFO']
        if path:
            if len(path) != 1 and path.endswith('/'):
                self.path = path[:-1]
            else:
                self.path = path
        else:
            self.path = '/'

        # QUERY_STRING isn't required to be in env, so let's check
        # PERF: if...in is faster than using env.get(...)
        if 'QUERY_STRING' in env and env['QUERY_STRING']:

            # TODO(kgriffs): Should this escape individual values instead
            # of the entire string? The way it is now, this:
            #
            #   x=ab%2Bcd%3D42%2C9
            #
            # becomes this:
            #
            #   x=ab+cd=42,9
            #
            self.query_string = uri.decode(env['QUERY_STRING'])

        else:
            self.query_string = six.text_type()

        # PERF: Don't parse it if we don't have to!
        if self.query_string:
            self._params = uri.parse_query_string(self.query_string)
        else:
            self._params = {}

        helpers.normalize_headers(env)
        self._cached_headers = {}

        self._cached_uri = None
        self._cached_relative_uri = None

        self.content_type = self._get_header_by_wsgi_name('HTTP_CONTENT_TYPE')

        # NOTE(kgriffs): Wrap wsgi.input if needed to make read() more robust,
        # normalizing semantics between, e.g., gunicorn and wsgiref.
        if isinstance(self.stream, NativeStream):  # pragma: nocover
            # NOTE(kgriffs): coverage can't detect that this *is* actually
            # covered since the test that does so uses multiprocessing.
            self.stream = helpers.Body(self.stream, self.content_length)

        # PERF(kgriffs): Technically, we should spend a few more
        # cycles and parse the content type for real, but
        # this heuristic will work virtually all the time.
        if (self.content_type and
                'application/x-www-form-urlencoded' in self.content_type):

            # NOTE(kgriffs): This assumes self.stream has been patched
            # above in the case of wsgiref, so that self.content_length
            # is not needed. Normally we just avoid accessing
            # self.content_length, because it is a little expensive
            # to call. We could cache self.content_length, but the
            # overhead to do that won't usually be helpful, since
            # content length will only ever be read once per
            # request in most cases.
            body = self.stream.read()
            body = body.decode('ascii')

            extra_params = uri.parse_query_string(uri.decode(body))
            self._params.update(extra_params)
Beispiel #19
0
    def __init__(self, env):
        """Initialize attributes based on a WSGI environment dict

        Note: Request is not meant to be instantiated directory by responders.

        Args:
            env: A WSGI environment dict passed in from the server. See also
                the PEP-3333 spec.

        """
        self.env = env

        self._wsgierrors = env['wsgi.errors']
        self.stream = env['wsgi.input']
        self.method = env['REQUEST_METHOD']

        # Normalize path
        path = env['PATH_INFO']
        if path:
            if len(path) != 1 and path.endswith('/'):
                self.path = path[:-1]
            else:
                self.path = path
        else:
            self.path = '/'

        # QUERY_STRING isn't required to be in env, so let's check
        # PERF: if...in is faster than using env.get(...)
        if 'QUERY_STRING' in env and env['QUERY_STRING']:

            # TODO(kgriffs): Should this escape individual values instead
            # of the entire string? The way it is now, this:
            #
            #   x=ab%2Bcd%3D42%2C9
            #
            # becomes this:
            #
            #   x=ab+cd=42,9
            #
            self.query_string = uri.decode(env['QUERY_STRING'])

        else:
            self.query_string = six.text_type()

        # PERF: Don't parse it if we don't have to!
        if self.query_string:
            self._params = uri.parse_query_string(self.query_string)
        else:
            self._params = {}

        helpers.normalize_headers(env)
        self._cached_headers = {}

        self._cached_uri = None
        self._cached_relative_uri = None

        # NOTE(kgriffs): Wrap wsgi.input if needed to make read() more robust,
        # normalizing semantics between, e.g., gunicorn and wsgiref.
        if isinstance(self.stream, NativeStream):  # pragma: nocover
            # NOTE(kgriffs): coverage can't detect that this *is* actually
            # covered since the test that does so uses multiprocessing.
            self.stream = helpers.Body(self.stream, self.content_length)
Beispiel #20
0
def create_environ(path='/', query_string='', protocol='HTTP/1.1',
                   scheme='http', host=DEFAULT_HOST, port=None,
                   headers=None, app='', body='', method='GET',
                   wsgierrors=None, file_wrapper=None):

    """Creates a mock PEP-3333 environ ``dict`` for simulating WSGI requests.

    Args:
        path (str, optional): The path for the request (default '/')
        query_string (str, optional): The query string to simulate, without a
            leading '?' (default '')
        protocol (str, optional): The HTTP protocol to simulate
            (default 'HTTP/1.1'). If set to 'HTTP/1.0', the Host header
            will not be added to the environment.
        scheme (str): URL scheme, either 'http' or 'https' (default 'http')
        host(str): Hostname for the request (default 'falconframework.org')
        port (str or int, optional): The TCP port to simulate. Defaults to
            the standard port used by the given scheme (i.e., 80 for 'http'
            and 443 for 'https').
        headers (dict or list, optional): Headers as a ``dict`` or an
            iterable collection of (*key*, *value*) ``tuple``'s
        app (str): Value for the ``SCRIPT_NAME`` environ variable, described in
            PEP-333: 'The initial portion of the request URL's "path" that
            corresponds to the application object, so that the application
            knows its virtual "location". This may be an empty string, if the
            application corresponds to the "root" of the server.' (default '')
        body (str or unicode): The body of the request (default '')
        method (str): The HTTP method to use (default 'GET')
        wsgierrors (io): The stream to use as *wsgierrors*
            (default ``sys.stderr``)
        file_wrapper: Callable that returns an iterable, to be used as
            the value for *wsgi.file_wrapper* in the environ.

    """

    body = io.BytesIO(body.encode('utf-8')
                      if isinstance(body, six.text_type) else body)

    # NOTE(kgriffs): wsgiref, gunicorn, and uWSGI all unescape
    # the paths before setting PATH_INFO
    path = uri.decode(path)

    # NOTE(kgriffs): nocover since this branch will never be
    # taken in Python3. However, the branch is tested under Py2,
    # in test_utils.TestFalconTesting.test_unicode_path_in_create_environ
    if six.PY2 and isinstance(path, six.text_type):  # pragma: nocover
        path = path.encode('utf-8')

    scheme = scheme.lower()
    if port is None:
        port = '80' if scheme == 'http' else '443'
    else:
        port = str(port)

    env = {
        'SERVER_PROTOCOL': protocol,
        'SERVER_SOFTWARE': 'gunicorn/0.17.0',
        'SCRIPT_NAME': app,
        'REQUEST_METHOD': method,
        'PATH_INFO': path,
        'QUERY_STRING': query_string,
        'HTTP_USER_AGENT': 'curl/7.24.0 (x86_64-apple-darwin12.0)',
        'REMOTE_PORT': '65133',
        'RAW_URI': '/',
        'REMOTE_ADDR': '127.0.0.1',
        'SERVER_NAME': host,
        'SERVER_PORT': port,

        'wsgi.url_scheme': scheme,
        'wsgi.input': body,
        'wsgi.errors': wsgierrors or sys.stderr,
        'wsgi.multithread': False,
        'wsgi.multiprocess': True,
        'wsgi.run_once': False
    }

    if file_wrapper is not None:
        env['wsgi.file_wrapper'] = file_wrapper

    if protocol != 'HTTP/1.0':
        host_header = host

        if scheme == 'https':
            if port != '443':
                host_header += ':' + port
        else:
            if port != '80':
                host_header += ':' + port

        env['HTTP_HOST'] = host_header

    content_length = body.seek(0, 2)
    body.seek(0)

    if content_length != 0:
        env['CONTENT_LENGTH'] = content_length

    if headers is not None:
        _add_headers_to_environ(env, headers)

    return env
Beispiel #21
0
 def test_uri_decode_replace_bad_unicode(self, encoded, expected):
     assert uri.decode(encoded) == expected
Beispiel #22
0
def create_environ(path='/',
                   query_string='',
                   protocol='HTTP/1.1',
                   scheme='http',
                   host=DEFAULT_HOST,
                   port=None,
                   headers=None,
                   app='',
                   body='',
                   method='GET',
                   wsgierrors=None,
                   file_wrapper=None):
    """Creates a mock PEP-3333 environ dict for simulating WSGI requests.

    Args:
        path (str, optional): The path for the request (default '/')
        query_string (str, optional): The query string to simulate, without a
            leading '?' (default '')
        protocol (str, optional): The HTTP protocol to simulate
            (default 'HTTP/1.1'). If set 'HTTP/1.0', the Host header
            will not be added to the environment.
        scheme (str): URL scheme, either 'http' or 'https' (default 'http')
        host(str): Hostname for the request (default 'falconframework.org')
        port (str or int, optional): The TCP port to simulate. Defaults to
            the standard port used by the given scheme (i.e., 80 for 'http'
            and 443 for 'https').
        headers (dict or list, optional): Headers as a dict or an
            iterable collection of ``(key, value)`` tuples
        app (str): Value for the SCRIPT_NAME environ variable, described in
            PEP-333: 'The initial portion of the request URL's "path" that
            corresponds to the application object, so that the application
            knows its virtual "location". This may be an empty string, if the
            application corresponds to the "root" of the server.' (default '')
        body (str or unicode): The body of the request (default '')
        method (str): The HTTP method to use (default 'GET')
        wsgierrors (io): The stream to use as wsgierrors (default sys.stderr)
        file_wrapper: Callable that returns an iterable, to be used as
            the value for 'wsgi.file_wrapper' in the environ.

    """

    body = io.BytesIO(
        body.encode('utf-8') if isinstance(body, six.text_type) else body)

    # NOTE(kgriffs): wsgiref, gunicorn, and uWSGI all unescape
    # the paths before setting PATH_INFO
    path = uri.decode(path)

    # NOTE(kgriffs): nocover since this branch will never be
    # taken in Python3. However, the branch is tested under Py2,
    # in test_utils.TestFalconTesting.test_unicode_path_in_create_environ
    if six.PY2 and isinstance(path, six.text_type):  # pragma: nocover
        path = path.encode('utf-8')

    scheme = scheme.lower()
    if port is None:
        port = '80' if scheme == 'http' else '443'
    else:
        port = str(port)

    env = {
        'SERVER_PROTOCOL': protocol,
        'SERVER_SOFTWARE': 'gunicorn/0.17.0',
        'SCRIPT_NAME': app,
        'REQUEST_METHOD': method,
        'PATH_INFO': path,
        'QUERY_STRING': query_string,
        'HTTP_USER_AGENT': 'curl/7.24.0 (x86_64-apple-darwin12.0)',
        'REMOTE_PORT': '65133',
        'RAW_URI': '/',
        'REMOTE_ADDR': '127.0.0.1',
        'SERVER_NAME': host,
        'SERVER_PORT': port,
        'wsgi.url_scheme': scheme,
        'wsgi.input': body,
        'wsgi.errors': wsgierrors or sys.stderr,
        'wsgi.multithread': False,
        'wsgi.multiprocess': True,
        'wsgi.run_once': False
    }

    if file_wrapper is not None:
        env['wsgi.file_wrapper'] = file_wrapper

    if protocol != 'HTTP/1.0':
        host_header = host

        if scheme == 'https':
            if port != '443':
                host_header += ':' + port
        else:
            if port != '80':
                host_header += ':' + port

        env['HTTP_HOST'] = host_header

    content_length = body.seek(0, 2)
    body.seek(0)

    if content_length != 0:
        env['CONTENT_LENGTH'] = content_length

    if headers is not None:
        _add_headers_to_environ(env, headers)

    return env
Beispiel #23
0
    def __init__(self, env):
        self.env = env

        if self.context_type is None:
            # Literal syntax is more efficient than using dict()
            self.context = {}
        else:
            # pylint will detect this as not-callable because it only sees the
            # declaration of None, not whatever type a subclass may have set.
            self.context = self.context_type()  # pylint: disable=not-callable

        self._wsgierrors = env['wsgi.errors']
        self.stream = env['wsgi.input']
        self.method = env['REQUEST_METHOD']

        # Normalize path
        path = env['PATH_INFO']
        if path:
            if len(path) != 1 and path.endswith('/'):
                self.path = path[:-1]
            else:
                self.path = path
        else:
            self.path = '/'

        # QUERY_STRING isn't required to be in env, so let's check
        # PERF: if...in is faster than using env.get(...)
        if 'QUERY_STRING' in env and env['QUERY_STRING']:

            # TODO(kgriffs): Should this escape individual values instead
            # of the entire string? The way it is now, this:
            #
            #   x=ab%2Bcd%3D42%2C9
            #
            # becomes this:
            #
            #   x=ab+cd=42,9
            #
            self.query_string = uri.decode(env['QUERY_STRING'])

        else:
            self.query_string = six.text_type()

        # PERF: Don't parse it if we don't have to!
        if self.query_string:
            self._params = uri.parse_query_string(self.query_string)
        else:
            self._params = {}

        helpers.normalize_headers(env)
        self._cached_headers = {}

        self._cached_uri = None
        self._cached_relative_uri = None

        self.content_type = self._get_header_by_wsgi_name('HTTP_CONTENT_TYPE')

        # NOTE(kgriffs): Wrap wsgi.input if needed to make read() more robust,
        # normalizing semantics between, e.g., gunicorn and wsgiref.
        if isinstance(self.stream, NativeStream):  # pragma: nocover
            # NOTE(kgriffs): coverage can't detect that this *is* actually
            # covered since the test that does so uses multiprocessing.
            self.stream = helpers.Body(self.stream, self.content_length)

        # PERF(kgriffs): Technically, we should spend a few more
        # cycles and parse the content type for real, but
        # this heuristic will work virtually all the time.
        if (self.content_type
                and 'application/x-www-form-urlencoded' in self.content_type):

            # NOTE(kgriffs): This assumes self.stream has been patched
            # above in the case of wsgiref, so that self.content_length
            # is not needed. Normally we just avoid accessing
            # self.content_length, because it is a little expensive
            # to call. We could cache self.content_length, but the
            # overhead to do that won't usually be helpful, since
            # content length will only ever be read once per
            # request in most cases.
            body = self.stream.read()
            body = body.decode('ascii')

            extra_params = uri.parse_query_string(uri.decode(body))
            self._params.update(extra_params)
Beispiel #24
0
def create_environ(path='/',
                   query_string='',
                   http_version='1.1',
                   scheme='http',
                   host=DEFAULT_HOST,
                   port=None,
                   headers=None,
                   app=None,
                   body='',
                   method='GET',
                   wsgierrors=None,
                   file_wrapper=None,
                   remote_addr=None,
                   root_path=None) -> Dict[str, Any]:
    """Creates a mock PEP-3333 environ ``dict`` for simulating WSGI requests.

    Keyword Args:
        path (str): The path for the request (default '/')
        query_string (str): The query string to simulate, without a
            leading '?' (default ''). The query string is passed as-is
            (it will not be percent-encoded).
        http_version (str): The HTTP version to simulate. Must be either
            '2', '2.0', 1.1', '1.0', or '1' (default '1.1'). If set to '1.0',
            the Host header will not be added to the scope.
        scheme (str): URL scheme, either 'http' or 'https' (default 'http')
        host(str): Hostname for the request (default 'falconframework.org')
        port (int): The TCP port to simulate. Defaults to
            the standard port used by the given scheme (i.e., 80 for 'http'
            and 443 for 'https'). A string may also be passed, as long as
            it can be parsed as an int.
        headers (dict): Headers as a dict-like (Mapping) object, or an
            iterable yielding a series of two-member (*name*, *value*)
            iterables. Each pair of strings provides the name and value
            for an HTTP header. If desired, multiple header values may be
            combined into a single (*name*, *value*) pair by joining the values
            with a comma when the header in question supports the list
            format (see also RFC 7230 and RFC 7231). Header names are not
            case-sensitive.
        root_path (str): Value for the ``SCRIPT_NAME`` environ variable, described in
            PEP-333: 'The initial portion of the request URL's "path" that
            corresponds to the application object, so that the application
            knows its virtual "location". This may be an empty string, if the
            application corresponds to the "root" of the server.' (default '')
        app (str): Deprecated alias for `root_path`. If both kwargs are passed,
            `root_path` takes precedence.
        body (str): The body of the request (default ''). The value will be
            encoded as UTF-8 in the WSGI environ. Alternatively, a byte string
            may be passed, in which case it will be used as-is.
        method (str): The HTTP method to use (default 'GET')
        wsgierrors (io): The stream to use as *wsgierrors*
            (default ``sys.stderr``)
        file_wrapper: Callable that returns an iterable, to be used as
            the value for *wsgi.file_wrapper* in the environ.
        remote_addr (str): Remote address for the request to use as the
            'REMOTE_ADDR' environ variable (default None)

    """

    http_version = _fixup_http_version(http_version)

    if query_string and query_string.startswith('?'):
        raise ValueError("query_string should not start with '?'")

    body = io.BytesIO(body.encode('utf-8') if isinstance(body, str) else body)

    # NOTE(kgriffs): wsgiref, gunicorn, and uWSGI all unescape
    # the paths before setting PATH_INFO
    path = uri.decode(path, unquote_plus=False)

    # NOTE(kgriffs): The decoded path may contain UTF-8 characters.
    # But according to the WSGI spec, no strings can contain chars
    # outside ISO-8859-1. Therefore, to reconcile the URI
    # encoding standard that allows UTF-8 with the WSGI spec
    # that does not, WSGI servers tunnel the string via
    # ISO-8859-1. falcon.testing.create_environ() mimics this
    # behavior, e.g.:
    #
    #   tunnelled_path = path.encode('utf-8').decode('iso-8859-1')
    #
    # falcon.Request does the following to reverse the process:
    #
    #   path = tunnelled_path.encode('iso-8859-1').decode('utf-8', 'replace')
    #
    path = path.encode('utf-8').decode('iso-8859-1')

    scheme = scheme.lower()
    if port is None:
        port = '80' if scheme == 'http' else '443'
    else:
        # NOTE(kgriffs): Running it through int() first ensures that if
        #   a string was passed, it is a valid integer.
        port = str(int(port))

    root_path = root_path or app or ''

    # NOTE(kgriffs): Judging by the algorithm given in PEP-3333 for
    # reconstructing the URL, SCRIPT_NAME is expected to contain a
    # preceding slash character.
    if root_path and not root_path.startswith('/'):
        root_path = '/' + root_path

    env = {
        'SERVER_PROTOCOL': 'HTTP/' + http_version,
        'SERVER_SOFTWARE': 'gunicorn/0.17.0',
        'SCRIPT_NAME': (root_path or ''),
        'REQUEST_METHOD': method,
        'PATH_INFO': path,
        'QUERY_STRING': query_string,
        'REMOTE_PORT': '65133',
        'RAW_URI': '/',
        'SERVER_NAME': host,
        'SERVER_PORT': port,
        'wsgi.version': (1, 0),
        'wsgi.url_scheme': scheme,
        'wsgi.input': body,
        'wsgi.errors': wsgierrors or sys.stderr,
        'wsgi.multithread': False,
        'wsgi.multiprocess': True,
        'wsgi.run_once': False
    }

    # NOTE(kgriffs): It has been observed that WSGI servers do not always
    #   set the REMOTE_ADDR variable, so we don't always set it either, to
    #   ensure the framework/app handles that case correctly.
    if remote_addr:
        env['REMOTE_ADDR'] = remote_addr

    if file_wrapper is not None:
        env['wsgi.file_wrapper'] = file_wrapper

    if http_version != '1.0':
        host_header = host

        if scheme == 'https':
            if port != '443':
                host_header += ':' + port
        else:
            if port != '80':
                host_header += ':' + port

        env['HTTP_HOST'] = host_header

    content_length = body.seek(0, 2)
    body.seek(0)

    if content_length != 0:
        env['CONTENT_LENGTH'] = str(content_length)

    if headers is not None:
        _add_headers_to_environ(env, headers)

    return env
Beispiel #25
0
def create_scope(path='/',
                 query_string='',
                 method='GET',
                 headers=None,
                 host=DEFAULT_HOST,
                 scheme=None,
                 port=None,
                 http_version='1.1',
                 remote_addr=None,
                 root_path=None,
                 content_length=None,
                 include_server=True) -> Dict[str, Any]:
    """Create a mock ASGI scope ``dict`` for simulating ASGI requests.

    Keyword Args:
        path (str): The path for the request (default '/')
        query_string (str): The query string to simulate, without a
            leading '?' (default ''). The query string is passed as-is
            (it will not be percent-encoded).
        method (str): The HTTP method to use (default 'GET')
        headers (dict): Headers as a dict-like (Mapping) object, or an
            iterable yielding a series of two-member (*name*, *value*)
            iterables. Each pair of strings provides the name and value
            for an HTTP header. If desired, multiple header values may be
            combined into a single (*name*, *value*) pair by joining the values
            with a comma when the header in question supports the list
            format (see also RFC 7230 and RFC 7231). When the
            request will include a body, the Content-Length header should be
            included in this list. Header names are not case-sensitive.
        host(str): Hostname for the request (default 'falconframework.org').
            This also determines the the value of the Host header in the
            request.
        scheme (str): URL scheme, either 'http' or 'https' (default 'http')
        port (int): The TCP port to simulate. Defaults to
            the standard port used by the given scheme (i.e., 80 for 'http'
            and 443 for 'https'). A string may also be passed, as long as
            it can be parsed as an int.
        http_version (str): The HTTP version to simulate. Must be either
            '2', '2.0', 1.1', '1.0', or '1' (default '1.1'). If set to '1.0',
            the Host header will not be added to the scope.
        remote_addr (str): Remote address for the request to use for
            the 'client' field in the connection scope (default None)
        root_path (str): The root path this application is mounted at; same as
            SCRIPT_NAME in WSGI (default '').
        content_length (int): The expected content length of the request
            body (default ``None``). If specified, this value will be
            used to set the Content-Length header in the request.
        include_server (bool): Set to ``False`` to not set the 'server' key
            in the scope ``dict`` (default ``True``).
    """

    http_version = _fixup_http_version(http_version)

    path = uri.decode(path, unquote_plus=False)

    # NOTE(kgriffs): Handles both None and ''
    query_string = query_string.encode() if query_string else b''

    if query_string and query_string.startswith(b'?'):
        raise ValueError("query_string should not start with '?'")

    scope = {
        'type': 'http',
        'asgi': {
            'version': '3.0',
            'spec_version': '2.1',
        },
        'http_version': http_version,
        'method': method.upper(),
        'path': path,
        'query_string': query_string,
    }

    # NOTE(kgriffs): Explicitly test against None so that the caller
    #   is able to simulate setting app to an empty string if they
    #   need to cover that branch in their code.
    if root_path is not None:
        # NOTE(kgriffs): Judging by the algorithm given in PEP-3333 for
        #   reconstructing the URL, SCRIPT_NAME is expected to contain a
        #   preceding slash character. Since ASGI states that this value is
        #   the same as WSGI's SCRIPT_NAME, we will follow suit here.
        if root_path and not root_path.startswith('/'):
            scope['root_path'] = '/' + root_path
        else:
            scope['root_path'] = root_path

    if scheme:
        if scheme not in ('http', 'https'):
            raise ValueError("scheme must be either 'http' or 'https'")

        scope['scheme'] = scheme

    if port is None:
        if (scheme or 'http') == 'http':
            port = 80
        else:
            port = 443
    else:
        port = int(port)

    if remote_addr:
        # NOTE(kgriffs): Choose from the standard IANA dynamic range
        remote_port = random.randint(49152, 65535)

        # NOTE(kgriffs): Expose as an iterable to ensure the framework/app
        #   isn't hard-coded to only work with a list or tuple.
        scope['client'] = iter([remote_addr, remote_port])

    if include_server:
        scope['server'] = iter([host, port])

    _add_headers_to_scope(scope, headers, content_length, host, port, scheme,
                          http_version)

    return scope
Beispiel #26
0
    def __init__(self, env, options=None):
        global _maybe_wrap_wsgi_stream

        self.env = env
        self.options = options if options else RequestOptions()

        if self.context_type is None:
            # Literal syntax is more efficient than using dict()
            self.context = {}
        else:
            # pylint will detect this as not-callable because it only sees the
            # declaration of None, not whatever type a subclass may have set.
            self.context = self.context_type()  # pylint: disable=not-callable

        self._wsgierrors = env['wsgi.errors']
        self.stream = env['wsgi.input']
        self.method = env['REQUEST_METHOD']

        # Normalize path
        path = env['PATH_INFO']
        if path:
            if len(path) != 1 and path.endswith('/'):
                self.path = path[:-1]
            else:
                self.path = path
        else:
            self.path = '/'

        self._params = {}

        # PERF(kgriffs): if...in is faster than using env.get(...)
        if 'QUERY_STRING' in env:
            query_str = env['QUERY_STRING']

            if query_str:
                self.query_string = uri.decode(query_str)
                self._params = uri.parse_query_string(
                    self.query_string,
                    keep_blank_qs_values=self.options.keep_blank_qs_values,
                )
            else:
                self.query_string = six.text_type()

        else:
            self.query_string = six.text_type()

        self._cached_headers = None
        self._cached_uri = None
        self._cached_relative_uri = None

        try:
            self.content_type = self.env['CONTENT_TYPE']
        except KeyError:
            self.content_type = None

        # NOTE(kgriffs): Wrap wsgi.input if needed to make read() more robust,
        # normalizing semantics between, e.g., gunicorn and wsgiref.
        if _maybe_wrap_wsgi_stream:
            if isinstance(self.stream, NativeStream):
                # NOTE(kgriffs): This is covered by tests, it's just that
                # coverage can't figure this out for some reason (TBD).
                self._wrap_stream()  # pragma nocover
            else:
                # PERF(kgriffs): If self.stream does not need to be wrapped
                # this time, it never needs to be wrapped since the server
                # will continue using the same type for wsgi.input.
                _maybe_wrap_wsgi_stream = False

        # PERF(kgriffs): Technically, we should spend a few more
        # cycles and parse the content type for real, but
        # this heuristic will work virtually all the time.
        if (self.content_type is not None
                and 'application/x-www-form-urlencoded' in self.content_type):
            self._parse_form_urlencoded()
Beispiel #27
0
    def __init__(self, env):
        self.env = env

        if self.context_type is None:
            # Literal syntax is more efficient than using dict()
            self.context = {}
        else:
            # pylint will detect this as not-callable because it only sees the
            # declaration of None, not whatever type a subclass may have set.
            self.context = self.context_type()  # pylint: disable=not-callable

        self._wsgierrors = env['wsgi.errors']
        self.stream = env['wsgi.input']
        self.method = env['REQUEST_METHOD']

        # Normalize path
        path = env['PATH_INFO']
        if path:
            if len(path) != 1 and path.endswith('/'):
                self.path = path[:-1]
            else:
                self.path = path
        else:
            self.path = '/'

        # QUERY_STRING isn't required to be in env, so let's check
        # PERF: if...in is faster than using env.get(...)
        if 'QUERY_STRING' in env and env['QUERY_STRING']:

            # TODO(kgriffs): Should this escape individual values instead
            # of the entire string? The way it is now, this:
            #
            #   x=ab%2Bcd%3D42%2C9
            #
            # becomes this:
            #
            #   x=ab+cd=42,9
            #
            self.query_string = uri.decode(env['QUERY_STRING'])

        else:
            self.query_string = six.text_type()

        # PERF: Don't parse it if we don't have to!
        if self.query_string:
            self._params = uri.parse_query_string(self.query_string)
        else:
            self._params = {}

        helpers.normalize_headers(env)
        self._cached_headers = {}

        self._cached_uri = None
        self._cached_relative_uri = None

        self.content_type = self._get_header_by_wsgi_name('HTTP_CONTENT_TYPE')

        # NOTE(kgriffs): Wrap wsgi.input if needed to make read() more robust,
        # normalizing semantics between, e.g., gunicorn and wsgiref.
        if isinstance(self.stream, NativeStream):  # pragma: nocover
            # NOTE(kgriffs): coverage can't detect that this *is* actually
            # covered since the test that does so uses multiprocessing.
            self.stream = helpers.Body(self.stream, self.content_length)

        self._parse_form_urlencoded()
Beispiel #28
0
    def __init__(self, env, options=None):
        global _maybe_wrap_wsgi_stream

        self.env = env
        self.options = options if options else RequestOptions()

        if self.context_type is None:
            # Literal syntax is more efficient than using dict()
            self.context = {}
        else:
            # pylint will detect this as not-callable because it only sees the
            # declaration of None, not whatever type a subclass may have set.
            self.context = self.context_type()  # pylint: disable=not-callable

        self._wsgierrors = env['wsgi.errors']
        self.stream = env['wsgi.input']
        self.method = env['REQUEST_METHOD']

        # Normalize path
        path = env['PATH_INFO']
        if path:
            if len(path) != 1 and path.endswith('/'):
                self.path = path[:-1]
            else:
                self.path = path
        else:
            self.path = '/'

        self._params = {}

        # PERF(kgriffs): if...in is faster than using env.get(...)
        if 'QUERY_STRING' in env:
            query_str = env['QUERY_STRING']

            if query_str:
                self.query_string = uri.decode(query_str)
                self._params = uri.parse_query_string(
                    self.query_string,
                    keep_blank_qs_values=self.options.keep_blank_qs_values,
                )
            else:
                self.query_string = six.text_type()

        else:
            self.query_string = six.text_type()

        self._cached_headers = None
        self._cached_uri = None
        self._cached_relative_uri = None

        try:
            self.content_type = self.env['CONTENT_TYPE']
        except KeyError:
            self.content_type = None

        # NOTE(kgriffs): Wrap wsgi.input if needed to make read() more robust,
        # normalizing semantics between, e.g., gunicorn and wsgiref.
        if _maybe_wrap_wsgi_stream:
            if isinstance(self.stream, NativeStream):
                # NOTE(kgriffs): This is covered by tests, it's just that
                # coverage can't figure this out for some reason (TBD).
                self._wrap_stream()  # pragma nocover
            else:
                # PERF(kgriffs): If self.stream does not need to be wrapped
                # this time, it never needs to be wrapped since the server
                # will continue using the same type for wsgi.input.
                _maybe_wrap_wsgi_stream = False

        # PERF(kgriffs): Technically, we should spend a few more
        # cycles and parse the content type for real, but
        # this heuristic will work virtually all the time.
        if (self.content_type is not None and
                'application/x-www-form-urlencoded' in self.content_type):
            self._parse_form_urlencoded()