Esempio n. 1
0
    def to_python(cls, data):
        if not data.get('url'):
            raise InterfaceValidationError("No value for 'url'")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            if method not in HTTP_METHODS:
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url'])

        query_string = data.get('query_string') or query_bit
        if query_string:
            # if querystring was a dict, convert it to a string
            if isinstance(query_string, dict):
                query_string = urlencode([(to_bytes(k), to_bytes(v))
                                          for k, v in query_string.items()])
            else:
                query_string = query_string
                if query_string[0] == '?':
                    # remove '?' prefix
                    query_string = query_string[1:]
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = ''

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        headers = data.get('headers')
        if headers:
            headers, cookie_header = format_headers(headers)
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        body = data.get('data')
        if isinstance(body, dict):
            body = json.dumps(body)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(data.get('env') or {})
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = body
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
Esempio n. 2
0
    def to_python(cls, data):
        if not data.get('url'):
            raise InterfaceValidationError("No value for 'url'")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            if method not in HTTP_METHODS:
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url'])

        query_string = data.get('query_string') or query_bit
        if query_string:
            # if querystring was a dict, convert it to a string
            if isinstance(query_string, dict):
                query_string = urlencode([(to_bytes(k), to_bytes(v))
                                          for k, v in query_string.items()])
            else:
                query_string = query_string
                if query_string[0] == '?':
                    # remove '?' prefix
                    query_string = query_string[1:]
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = ''

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        headers = data.get('headers')
        if headers:
            headers, cookie_header = format_headers(headers)
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        body = data.get('data')
        if isinstance(body, dict):
            body = json.dumps(body)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(data.get('env') or {})
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
    def to_python(cls, data):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET',
                              'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url'])

        query_string = data.get('query_string') or query_bit
        if query_string:
            # if querystring was a dict, convert it to a string
            if isinstance(query_string, dict):
                query_string = urlencode([(to_bytes(k), to_bytes(v))
                                          for k, v in query_string.items()])
            else:
                if query_string[0] == '?':
                    # remove '?' prefix
                    query_string = query_string[1:]
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = ''

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        headers = data.get('headers')
        if headers:
            headers, cookie_header = format_headers(headers)
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'),
                            None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
Esempio n. 4
0
    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if rust_renormalized:
            data.setdefault('query_string', [])
            for key in (
                    "method",
                    "url",
                    "fragment",
                    "cookies",
                    "headers",
                    "data",
                    "env",
                    "inferred_content_type",
            ):
                data.setdefault(key, None)
            return cls(**data)

        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET',
                              'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        if data.get('url', None):
            url = to_unicode(data['url'])
            # The JavaScript SDK used to send an ellipsis character for
            # truncated URLs. Canonical URLs do not contain UTF-8 characters in
            # either the path, query string or fragment, so we replace it with
            # three dots (which is the behavior of other SDKs). This effectively
            # makes the string two characters longer, but it will be trimmed
            # again down below.
            if url.endswith(u"\u2026"):
                url = url[:-1] + "..."
            scheme, netloc, path, query_bit, fragment_bit = urlsplit(url)
        else:
            scheme = netloc = path = query_bit = fragment_bit = None

        query_string = data.get('query_string') or query_bit
        if query_string:
            if isinstance(query_string, six.string_types):
                if query_string[0] == '?':
                    query_string = query_string[1:]
                if query_string.endswith(u"\u2026"):
                    query_string = query_string[:-1] + "..."
                query_string = [
                    (to_unicode(k), jsonify(v))
                    for k, v in parse_qsl(query_string, keep_blank_values=True)
                ]
            elif isinstance(query_string, dict):
                query_string = [(to_unicode(k), jsonify(v))
                                for k, v in six.iteritems(query_string)]
            elif isinstance(query_string, list):
                query_string = [
                    tuple(tup) for tup in query_string
                    if isinstance(tup, (tuple, list)) and len(tup) == 2
                ]
            else:
                query_string = []
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = []

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        if data.get("headers"):
            headers, cookie_header = format_headers(
                get_path(data, "headers", filter=True))
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'),
                            None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
Esempio n. 5
0
    def to_python(cls, data):
        if not data.get('url'):
            raise InterfaceValidationError("No value for 'url'")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET', 'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url'])

        query_string = data.get('query_string') or query_bit
        if query_string:
            # if querystring was a dict, convert it to a string
            if isinstance(query_string, dict):
                query_string = urlencode(
                    [(to_bytes(k), to_bytes(v)) for k, v in query_string.items()]
                )
            else:
                query_string = query_string
                if query_string[0] == '?':
                    # remove '?' prefix
                    query_string = query_string[1:]
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = ''

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        headers = data.get('headers')
        if headers:
            headers, cookie_header = format_headers(headers)
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'), None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(data.get('env') or {})
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
Esempio n. 6
0
    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if rust_renormalized:
            data.setdefault('query_string', [])
            for key in (
                "method",
                "url",
                "fragment",
                "cookies",
                "headers",
                "data",
                "env",
                "inferred_content_type",
            ):
                data.setdefault(key, None)
            return cls(**data)

        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET', 'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        if data.get('url', None):
            url = to_unicode(data['url'])
            # The JavaScript SDK used to send an ellipsis character for
            # truncated URLs. Canonical URLs do not contain UTF-8 characters in
            # either the path, query string or fragment, so we replace it with
            # three dots (which is the behavior of other SDKs). This effectively
            # makes the string two characters longer, but it will be trimmed
            # again down below.
            if url.endswith(u"\u2026"):
                url = url[:-1] + "..."
            scheme, netloc, path, query_bit, fragment_bit = urlsplit(url)
        else:
            scheme = netloc = path = query_bit = fragment_bit = None

        query_string = data.get('query_string') or query_bit
        if query_string:
            if isinstance(query_string, six.string_types):
                if query_string[0] == '?':
                    query_string = query_string[1:]
                if query_string.endswith(u"\u2026"):
                    query_string = query_string[:-1] + "..."
                query_string = [
                    (to_unicode(k), jsonify(v))
                    for k, v in parse_qsl(query_string, keep_blank_values=True)
                ]
            elif isinstance(query_string, dict):
                query_string = [(to_unicode(k), jsonify(v)) for k, v in six.iteritems(query_string)]
            elif isinstance(query_string, list):
                query_string = [
                    tuple(tup) for tup in query_string
                    if isinstance(tup, (tuple, list)) and len(tup) == 2
                ]
            else:
                query_string = []
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = []

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        if data.get("headers"):
            headers, cookie_header = format_headers(get_path(data, "headers", filter=True))
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'), None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)