Python heuristic_decode 예제들, sentry.utils.http.heuristic_decode Python 예제들

예제 #1

0

파일 보기

파일: tests.py 프로젝트: alshopov/sentry

    def test_possible_type_mismatch(self):
        data, content_type = heuristic_decode(self.json_body, 'application/x-www-form-urlencoded')
        assert data == {'key': 'value', 'key2': 'value2'}
        assert content_type == 'application/json'

        data, content_type = heuristic_decode(self.url_body, 'application/json')
        assert data == {'key': ['value'], 'key2': ['value2']}
        assert content_type == 'application/x-www-form-urlencoded'

예제 #2

0

파일 보기

    def test_possible_type_mismatch(self):
        data, content_type = heuristic_decode(self.json_body, "application/x-www-form-urlencoded")
        assert data == {"key": "value", "key2": "value2"}
        assert content_type == "application/json"

        data, content_type = heuristic_decode(self.url_body, "application/json")
        assert data == {"key": ["value"], "key2": ["value2"]}
        assert content_type == "application/x-www-form-urlencoded"

예제 #3

0

파일 보기

파일: tests.py 프로젝트: webZW/sentry

    def test_possible_type_mismatch(self):
        data, content_type = heuristic_decode(
            self.json_body, 'application/x-www-form-urlencoded')
        assert data == {'key': 'value', 'key2': 'value2'}
        assert content_type == 'application/json'

        data, content_type = heuristic_decode(self.url_body,
                                              'application/json')
        assert data == {'key': ['value'], 'key2': ['value2']}
        assert content_type == 'application/x-www-form-urlencoded'

예제 #4

0

파일 보기

파일: http.py 프로젝트: p22p/monitor-and-fix-crashes-in-realtime

    def to_python(cls, data):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET',
                              'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url'])

        query_string = data.get('query_string') or query_bit
        if query_string:
            # if querystring was a dict, convert it to a string
            if isinstance(query_string, dict):
                query_string = urlencode([(to_bytes(k), to_bytes(v))
                                          for k, v in query_string.items()])
            else:
                if query_string[0] == '?':
                    # remove '?' prefix
                    query_string = query_string[1:]
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = ''

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        headers = data.get('headers')
        if headers:
            headers, cookie_header = format_headers(headers)
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'),
                            None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)

예제 #5

0

파일 보기

파일: http.py 프로젝트: rubiruchi/sentry

    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if rust_renormalized:
            data.setdefault('query_string', [])
            for key in (
                    "method",
                    "url",
                    "fragment",
                    "cookies",
                    "headers",
                    "data",
                    "env",
                    "inferred_content_type",
            ):
                data.setdefault(key, None)
            return cls(**data)

        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET',
                              'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        if data.get('url', None):
            url = to_unicode(data['url'])
            # The JavaScript SDK used to send an ellipsis character for
            # truncated URLs. Canonical URLs do not contain UTF-8 characters in
            # either the path, query string or fragment, so we replace it with
            # three dots (which is the behavior of other SDKs). This effectively
            # makes the string two characters longer, but it will be trimmed
            # again down below.
            if url.endswith(u"\u2026"):
                url = url[:-1] + "..."
            scheme, netloc, path, query_bit, fragment_bit = urlsplit(url)
        else:
            scheme = netloc = path = query_bit = fragment_bit = None

        query_string = data.get('query_string') or query_bit
        if query_string:
            if isinstance(query_string, six.string_types):
                if query_string[0] == '?':
                    query_string = query_string[1:]
                if query_string.endswith(u"\u2026"):
                    query_string = query_string[:-1] + "..."
                query_string = [
                    (to_unicode(k), jsonify(v))
                    for k, v in parse_qsl(query_string, keep_blank_values=True)
                ]
            elif isinstance(query_string, dict):
                query_string = [(to_unicode(k), jsonify(v))
                                for k, v in six.iteritems(query_string)]
            elif isinstance(query_string, list):
                query_string = [
                    tuple(tup) for tup in query_string
                    if isinstance(tup, (tuple, list)) and len(tup) == 2
                ]
            else:
                query_string = []
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = []

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        if data.get("headers"):
            headers, cookie_header = format_headers(
                get_path(data, "headers", filter=True))
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'),
                            None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)

예제 #6

0

파일 보기

파일: tests.py 프로젝트: webZW/sentry

 def test_unable_to_decode(self):
     data, content_type = heuristic_decode('string body', 'text/plain')
     assert data == 'string body'
     assert content_type == 'text/plain'

예제 #7

0

파일 보기

파일: tests.py 프로젝트: webZW/sentry

 def test_no_possible_type(self):
     data, content_type = heuristic_decode(self.json_body)
     assert data == {'key': 'value', 'key2': 'value2'}
     assert content_type == 'application/json'

예제 #8

0

파일 보기

파일: tests.py 프로젝트: webZW/sentry

 def test_url_encoded(self):
     data, content_type = heuristic_decode(
         self.url_body, 'application/x-www-form-urlencoded')
     assert data == {'key': ['value'], 'key2': ['value2']}
     assert content_type == 'application/x-www-form-urlencoded'

예제 #9

0

파일 보기

파일: tests.py 프로젝트: waterdrops/sentry

 def test_unable_to_decode(self):
     data, content_type = heuristic_decode("string body", "text/plain")
     assert data == "string body"
     assert content_type == "text/plain"

예제 #10

0

파일 보기

파일: tests.py 프로젝트: waterdrops/sentry

 def test_no_possible_type(self):
     data, content_type = heuristic_decode(self.json_body)
     assert data == {"key": "value", "key2": "value2"}
     assert content_type == "application/json"

예제 #11

0

파일 보기

파일: tests.py 프로젝트: waterdrops/sentry

 def test_url_encoded(self):
     data, content_type = heuristic_decode(
         self.url_body, "application/x-www-form-urlencoded")
     assert data == {"key": ["value"], "key2": ["value2"]}
     assert content_type == "application/x-www-form-urlencoded"

예제 #12

0

파일 보기

파일: tests.py 프로젝트: alshopov/sentry

 def test_unable_to_decode(self):
     data, content_type = heuristic_decode('string body', 'text/plain')
     assert data == 'string body'
     assert content_type == 'text/plain'

예제 #13

0

파일 보기

파일: tests.py 프로젝트: alshopov/sentry

 def test_no_possible_type(self):
     data, content_type = heuristic_decode(self.json_body)
     assert data == {'key': 'value', 'key2': 'value2'}
     assert content_type == 'application/json'

예제 #14

0

파일 보기

파일: tests.py 프로젝트: alshopov/sentry

 def test_url_encoded(self):
     data, content_type = heuristic_decode(self.url_body, 'application/x-www-form-urlencoded')
     assert data == {'key': ['value'], 'key2': ['value2']}
     assert content_type == 'application/x-www-form-urlencoded'

예제 #15

0

파일 보기

파일: http.py 프로젝트: NuttasitBoonwat/sentry

    def to_python(cls, data):
        if not data.get('url'):
            raise InterfaceValidationError("No value for 'url'")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET', 'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url'])

        query_string = data.get('query_string') or query_bit
        if query_string:
            # if querystring was a dict, convert it to a string
            if isinstance(query_string, dict):
                query_string = urlencode(
                    [(to_bytes(k), to_bytes(v)) for k, v in query_string.items()]
                )
            else:
                query_string = query_string
                if query_string[0] == '?':
                    # remove '?' prefix
                    query_string = query_string[1:]
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = ''

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        headers = data.get('headers')
        if headers:
            headers, cookie_header = format_headers(headers)
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'), None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(data.get('env') or {})
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)

예제 #16

0

파일 보기

파일: http.py 프로젝트: getsentry/sentry

    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if rust_renormalized:
            data.setdefault('query_string', [])
            for key in (
                "method",
                "url",
                "fragment",
                "cookies",
                "headers",
                "data",
                "env",
                "inferred_content_type",
            ):
                data.setdefault(key, None)
            return cls(**data)

        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET', 'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        if data.get('url', None):
            url = to_unicode(data['url'])
            # The JavaScript SDK used to send an ellipsis character for
            # truncated URLs. Canonical URLs do not contain UTF-8 characters in
            # either the path, query string or fragment, so we replace it with
            # three dots (which is the behavior of other SDKs). This effectively
            # makes the string two characters longer, but it will be trimmed
            # again down below.
            if url.endswith(u"\u2026"):
                url = url[:-1] + "..."
            scheme, netloc, path, query_bit, fragment_bit = urlsplit(url)
        else:
            scheme = netloc = path = query_bit = fragment_bit = None

        query_string = data.get('query_string') or query_bit
        if query_string:
            if isinstance(query_string, six.string_types):
                if query_string[0] == '?':
                    query_string = query_string[1:]
                if query_string.endswith(u"\u2026"):
                    query_string = query_string[:-1] + "..."
                query_string = [
                    (to_unicode(k), jsonify(v))
                    for k, v in parse_qsl(query_string, keep_blank_values=True)
                ]
            elif isinstance(query_string, dict):
                query_string = [(to_unicode(k), jsonify(v)) for k, v in six.iteritems(query_string)]
            elif isinstance(query_string, list):
                query_string = [
                    tuple(tup) for tup in query_string
                    if isinstance(tup, (tuple, list)) and len(tup) == 2
                ]
            else:
                query_string = []
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = []

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        if data.get("headers"):
            headers, cookie_header = format_headers(get_path(data, "headers", filter=True))
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'), None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)