예제 #1
0
    def to_python(cls, data):
        data = data.copy()

        extra_data = data.pop("data", data)
        if not isinstance(extra_data, dict):
            extra_data = {}

        ident = trim(data.pop("id", None), 128)
        if ident:
            ident = unicode(ident)
        try:
            email = trim(validate_email(data.pop("email", None), False), 128)
        except ValueError:
            raise InterfaceValidationError("Invalid value for 'email'")
        username = trim(data.pop("username", None), 128)
        if username:
            username = unicode(username)

        try:
            ip_address = validate_ip(data.pop("ip_address", None), False)
        except ValueError:
            raise InterfaceValidationError("Invalid value for 'ip_address'")

        # TODO(dcramer): patch in fix to deal w/ old data but not allow new
        # if not (ident or email or username or ip_address):
        #     raise ValueError('No identifying value')

        kwargs = {"id": ident, "email": email, "username": username, "ip_address": ip_address}

        kwargs["data"] = trim_dict(extra_data)
        return cls(**kwargs)
예제 #2
0
파일: user.py 프로젝트: zouyang2015/sentry
    def to_python(cls, data):
        data = data.copy()

        ident = data.pop('id', None)
        if ident is not None:
            ident = trim(six.text_type(ident), 128)

        try:
            email = trim(validate_email(data.pop('email', None), False),
                         MAX_EMAIL_FIELD_LENGTH)
        except ValueError:
            raise InterfaceValidationError("Invalid value for 'email'")

        username = data.pop('username', None)
        if username is not None:
            username = trim(six.text_type(username), 128)

        name = data.pop('name', None)
        if name is not None:
            name = trim(six.text_type(name), 128)

        try:
            ip_address = validate_ip(data.pop('ip_address', None), False)
        except ValueError:
            raise InterfaceValidationError("Invalid value for 'ip_address'")

        geo = data.pop('geo', None)
        if not geo and ip_address:
            geo = Geo.from_ip_address(ip_address)
        elif geo:
            geo = Geo.to_python(geo)

        extra_data = data.pop('data', None)
        if not isinstance(extra_data, dict):
            extra_data = {}
        extra_data.update(data)

        # TODO(dcramer): patch in fix to deal w/ old data but not allow new
        # if not (ident or email or username or ip_address):
        #     raise ValueError('No identifying value')

        kwargs = {
            'id': ident,
            'email': email,
            'username': username,
            'ip_address': ip_address,
            'name': name,
            'geo': geo,
            'data': trim_dict(extra_data)
        }

        return cls(**kwargs)
예제 #3
0
파일: user.py 프로젝트: Kayle009/sentry
    def to_python(cls, data):
        data = data.copy()

        ident = data.pop('id', None)
        if ident is not None:
            ident = trim(six.text_type(ident), 128)

        try:
            email = trim(validate_email(data.pop('email', None), False), MAX_EMAIL_FIELD_LENGTH)
        except ValueError:
            raise InterfaceValidationError("Invalid value for 'email'")

        username = data.pop('username', None)
        if username is not None:
            username = trim(six.text_type(username), 128)

        name = data.pop('name', None)
        if name is not None:
            name = trim(six.text_type(name), 128)

        try:
            ip_address = validate_ip(data.pop('ip_address', None), False)
        except ValueError:
            raise InterfaceValidationError("Invalid value for 'ip_address'")

        geo = data.pop('geo', None)
        if not geo and ip_address:
            geo = Geo.from_ip_address(ip_address)
        elif geo:
            geo = Geo.to_python(geo)

        extra_data = data.pop('data', None)
        if not isinstance(extra_data, dict):
            extra_data = {}
        extra_data.update(data)

        # TODO(dcramer): patch in fix to deal w/ old data but not allow new
        # if not (ident or email or username or ip_address):
        #     raise ValueError('No identifying value')

        kwargs = {
            'id': ident,
            'email': email,
            'username': username,
            'ip_address': ip_address,
            'name': name,
            'geo': geo,
            'data': trim_dict(extra_data)
        }

        return cls(**kwargs)
예제 #4
0
    def to_python(cls, data):
        data = data.copy()

        extra_data = data.pop('data', data)
        if not isinstance(extra_data, dict):
            extra_data = {}

        ident = trim(data.pop('id', None), 128)
        if ident:
            ident = six.text_type(ident)
        try:
            email = trim(validate_email(data.pop('email', None), False),
                         MAX_EMAIL_FIELD_LENGTH)
        except ValueError:
            raise InterfaceValidationError("Invalid value for 'email'")

        username = trim(data.pop('username', None), 128)
        if username:
            username = six.text_type(username)

        name = trim(data.pop('name', None), 128)
        if name:
            name = six.text_type(name)

        try:
            ip_address = validate_ip(data.pop('ip_address', None), False)
        except ValueError:
            raise InterfaceValidationError("Invalid value for 'ip_address'")

        # TODO(dcramer): patch in fix to deal w/ old data but not allow new
        # if not (ident or email or username or ip_address):
        #     raise ValueError('No identifying value')

        kwargs = {
            'id': ident,
            'email': email,
            'username': username,
            'ip_address': ip_address,
            'name': name,
        }

        kwargs['data'] = trim_dict(extra_data)
        return cls(**kwargs)
예제 #5
0
파일: user.py 프로젝트: duanshuaimin/sentry
    def to_python(cls, data):
        data = data.copy()

        extra_data = data.pop('data', data)
        if not isinstance(extra_data, dict):
            extra_data = {}

        ident = trim(data.pop('id', None), 128)
        if ident:
            ident = six.text_type(ident)
        try:
            email = trim(validate_email(data.pop('email', None), False), MAX_EMAIL_FIELD_LENGTH)
        except ValueError:
            raise InterfaceValidationError("Invalid value for 'email'")

        username = trim(data.pop('username', None), 128)
        if username:
            username = six.text_type(username)

        name = trim(data.pop('name', None), 128)
        if name:
            name = six.text_type(name)

        try:
            ip_address = validate_ip(data.pop('ip_address', None), False)
        except ValueError:
            raise InterfaceValidationError("Invalid value for 'ip_address'")

        # TODO(dcramer): patch in fix to deal w/ old data but not allow new
        # if not (ident or email or username or ip_address):
        #     raise ValueError('No identifying value')

        kwargs = {
            'id': ident,
            'email': email,
            'username': username,
            'ip_address': ip_address,
            'name': name,
        }

        kwargs['data'] = trim_dict(extra_data)
        return cls(**kwargs)
예제 #6
0
    def normalize(self):
        # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip
        # First we pull out our top-level (non-data attr) kwargs
        data = self.data

        if not isinstance(data.get('level'), (six.string_types, int)):
            data['level'] = logging.ERROR
        elif data['level'] not in LOG_LEVELS:
            data['level'] = logging.ERROR

        if not data.get('logger'):
            data['logger'] = DEFAULT_LOGGER_NAME
        else:
            logger = trim(data['logger'].strip(), 64)
            if TagKey.is_valid_key(logger):
                data['logger'] = logger
            else:
                data['logger'] = DEFAULT_LOGGER_NAME

        if data.get('platform'):
            data['platform'] = trim(data['platform'], 64)

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp

        if not data.get('event_id'):
            data['event_id'] = uuid4().hex

        data.setdefault('message', '')
        data.setdefault('culprit', None)
        data.setdefault('time_spent', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('checksum', None)
        data.setdefault('fingerprint', None)
        data.setdefault('platform', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('errors', [])

        tags = data.get('tags')
        if not tags:
            tags = []
        # full support for dict syntax
        elif isinstance(tags, dict):
            tags = tags.items()
        # prevent [tag, tag, tag] (invalid) syntax
        elif not all(len(t) == 2 for t in tags):
            tags = []
        else:
            tags = list(tags)

        data['tags'] = []
        for key, value in tags:
            key = six.text_type(key).strip()
            value = six.text_type(value).strip()
            if not (key and value):
                continue

            data['tags'].append((key, value))

        if not isinstance(data['extra'], dict):
            # throw it away
            data['extra'] = {}

        trim_dict(
            data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        # TODO(dcramer): more of validate data needs stuffed into the manager
        for key in data.keys():
            if key in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(key)

            try:
                interface = get_interface(key)()
            except ValueError:
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception:
                pass

        data['version'] = self.version

        # TODO(dcramer): find a better place for this logic
        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        if 'sentry.interfaces.Http' in data:
            try:
                ip_address = validate_ip(
                    data['sentry.interfaces.Http'].get(
                        'env', {}).get('REMOTE_ADDR'),
                    required=False,
                )
            except ValueError:
                ip_address = None
            if ip_address:
                data.setdefault('sentry.interfaces.User', {})
                data['sentry.interfaces.User'].setdefault(
                    'ip_address', ip_address)

        if data['time_spent']:
            data['time_spent'] = int(data['time_spent'])

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['message']:
            data['message'] = trim(
                data['message'], settings.SENTRY_MAX_MESSAGE_LENGTH)

        return data
예제 #7
0
    def normalize(self):
        # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip
        # First we pull out our top-level (non-data attr) kwargs
        data = self.data

        if not isinstance(data.get('level'), (six.string_types, int)):
            data['level'] = logging.ERROR
        elif data['level'] not in LOG_LEVELS:
            data['level'] = logging.ERROR

        if not data.get('logger'):
            data['logger'] = DEFAULT_LOGGER_NAME
        else:
            logger = trim(data['logger'].strip(), 64)
            if TagKey.is_valid_key(logger):
                data['logger'] = logger
            else:
                data['logger'] = DEFAULT_LOGGER_NAME

        if data.get('platform'):
            data['platform'] = trim(data['platform'], 64)

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp

        if not data.get('event_id'):
            data['event_id'] = uuid4().hex

        data.setdefault('message', '')
        data.setdefault('culprit', None)
        data.setdefault('time_spent', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('checksum', None)
        data.setdefault('fingerprint', None)
        data.setdefault('platform', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('errors', [])

        tags = data.get('tags')
        if not tags:
            tags = []
        # full support for dict syntax
        elif isinstance(tags, dict):
            tags = tags.items()
        # prevent [tag, tag, tag] (invalid) syntax
        elif not all(len(t) == 2 for t in tags):
            tags = []
        else:
            tags = list(tags)

        data['tags'] = []
        for key, value in tags:
            key = six.text_type(key).strip()
            value = six.text_type(value).strip()
            if not (key and value):
                continue

            data['tags'].append((key, value))

        if not isinstance(data['extra'], dict):
            # throw it away
            data['extra'] = {}

        trim_dict(data['extra'],
                  max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        # TODO(dcramer): more of validate data needs stuffed into the manager
        for key in data.keys():
            if key in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(key)

            try:
                interface = get_interface(key)()
            except ValueError:
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception:
                pass

        data['version'] = self.version

        # TODO(dcramer): find a better place for this logic
        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        if 'sentry.interfaces.Http' in data:
            try:
                ip_address = validate_ip(
                    data['sentry.interfaces.Http'].get('env',
                                                       {}).get('REMOTE_ADDR'),
                    required=False,
                )
            except ValueError:
                ip_address = None
            if ip_address:
                data.setdefault('sentry.interfaces.User', {})
                data['sentry.interfaces.User'].setdefault(
                    'ip_address', ip_address)

        if data['time_spent']:
            data['time_spent'] = int(data['time_spent'])

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['message']:
            data['message'] = trim(data['message'],
                                   settings.SENTRY_MAX_MESSAGE_LENGTH)

        return data
    def to_python(cls, data):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET',
                              'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url'])

        query_string = data.get('query_string') or query_bit
        if query_string:
            # if querystring was a dict, convert it to a string
            if isinstance(query_string, dict):
                query_string = urlencode([(to_bytes(k), to_bytes(v))
                                          for k, v in query_string.items()])
            else:
                if query_string[0] == '?':
                    # remove '?' prefix
                    query_string = query_string[1:]
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = ''

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        headers = data.get('headers')
        if headers:
            headers, cookie_header = format_headers(headers)
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'),
                            None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
예제 #9
0
파일: http.py 프로젝트: rubiruchi/sentry
    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if rust_renormalized:
            data.setdefault('query_string', [])
            for key in (
                    "method",
                    "url",
                    "fragment",
                    "cookies",
                    "headers",
                    "data",
                    "env",
                    "inferred_content_type",
            ):
                data.setdefault(key, None)
            return cls(**data)

        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET',
                              'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        if data.get('url', None):
            url = to_unicode(data['url'])
            # The JavaScript SDK used to send an ellipsis character for
            # truncated URLs. Canonical URLs do not contain UTF-8 characters in
            # either the path, query string or fragment, so we replace it with
            # three dots (which is the behavior of other SDKs). This effectively
            # makes the string two characters longer, but it will be trimmed
            # again down below.
            if url.endswith(u"\u2026"):
                url = url[:-1] + "..."
            scheme, netloc, path, query_bit, fragment_bit = urlsplit(url)
        else:
            scheme = netloc = path = query_bit = fragment_bit = None

        query_string = data.get('query_string') or query_bit
        if query_string:
            if isinstance(query_string, six.string_types):
                if query_string[0] == '?':
                    query_string = query_string[1:]
                if query_string.endswith(u"\u2026"):
                    query_string = query_string[:-1] + "..."
                query_string = [
                    (to_unicode(k), jsonify(v))
                    for k, v in parse_qsl(query_string, keep_blank_values=True)
                ]
            elif isinstance(query_string, dict):
                query_string = [(to_unicode(k), jsonify(v))
                                for k, v in six.iteritems(query_string)]
            elif isinstance(query_string, list):
                query_string = [
                    tuple(tup) for tup in query_string
                    if isinstance(tup, (tuple, list)) and len(tup) == 2
                ]
            else:
                query_string = []
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = []

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        if data.get("headers"):
            headers, cookie_header = format_headers(
                get_path(data, "headers", filter=True))
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'),
                            None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
예제 #10
0
    def normalize(self):
        # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip
        # First we pull out our top-level (non-data attr) kwargs
        data = self.data

        if not isinstance(data.get("level"), (six.string_types, int)):
            data["level"] = logging.ERROR
        elif data["level"] not in LOG_LEVELS:
            data["level"] = logging.ERROR

        if not data.get("logger"):
            data["logger"] = DEFAULT_LOGGER_NAME
        else:
            logger = trim(data["logger"].strip(), 64)
            if TagKey.is_valid_key(logger):
                data["logger"] = logger
            else:
                data["logger"] = DEFAULT_LOGGER_NAME

        if data.get("platform"):
            data["platform"] = trim(data["platform"], 64)

        current_timestamp = timezone.now()
        timestamp = data.get("timestamp")
        if not timestamp:
            timestamp = current_timestamp

        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime("%s"))

        data["timestamp"] = timestamp
        data["received"] = float(timezone.now().strftime("%s"))

        if not data.get("event_id"):
            data["event_id"] = uuid4().hex

        data.setdefault("culprit", None)
        data.setdefault("server_name", None)
        data.setdefault("site", None)
        data.setdefault("checksum", None)
        data.setdefault("fingerprint", None)
        data.setdefault("platform", None)
        data.setdefault("environment", None)
        data.setdefault("extra", {})
        data.setdefault("errors", [])

        tags = data.get("tags")
        if not tags:
            tags = []
        # full support for dict syntax
        elif isinstance(tags, dict):
            tags = tags.items()
        # prevent [tag, tag, tag] (invalid) syntax
        elif not all(len(t) == 2 for t in tags):
            tags = []
        else:
            tags = list(tags)

        data["tags"] = []
        for key, value in tags:
            key = six.text_type(key).strip()
            value = six.text_type(value).strip()
            if not (key and value):
                continue

            data["tags"].append((key, value))

            # XXX(dcramer): many legacy apps are using the environment tag
            # rather than the key itself
            if key == "environment" and not data.get("environment"):
                data["environment"] = value

        if not isinstance(data["extra"], dict):
            # throw it away
            data["extra"] = {}

        trim_dict(data["extra"], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        # TODO(dcramer): more of validate data needs stuffed into the manager
        for key in data.keys():
            if key in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(key)

            try:
                interface = get_interface(key)()
            except ValueError:
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception:
                # XXX: we should consider logging this.
                pass

        # TODO(dcramer): this logic is duplicated in ``validate_data`` from
        # coreapi

        # message is coerced to an interface, as its used for pure
        # index of searchable strings
        # See GH-3248
        message = data.pop("message", None)
        if message:
            if "sentry.interfaces.Message" not in data:
                interface = get_interface("sentry.interfaces.Message")
                try:
                    inst = interface.to_python({"message": message})
                    data[inst.get_path()] = inst.to_json()
                except Exception:
                    pass
            elif not data["sentry.interfaces.Message"].get("formatted"):
                interface = get_interface("sentry.interfaces.Message")
                try:
                    inst = interface.to_python(dict(data["sentry.interfaces.Message"], formatted=message))
                    data[inst.get_path()] = inst.to_json()
                except Exception:
                    pass

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data["type"] = eventtypes.infer(data).key

        data["version"] = self.version

        # TODO(dcramer): find a better place for this logic
        exception = data.get("sentry.interfaces.Exception")
        stacktrace = data.get("sentry.interfaces.Stacktrace")
        if exception and len(exception["values"]) == 1 and stacktrace:
            exception["values"][0]["stacktrace"] = stacktrace
            del data["sentry.interfaces.Stacktrace"]

        if "sentry.interfaces.Http" in data:
            try:
                ip_address = validate_ip(
                    data["sentry.interfaces.Http"].get("env", {}).get("REMOTE_ADDR"), required=False
                )
            except ValueError:
                ip_address = None
            if ip_address:
                data.setdefault("sentry.interfaces.User", {})
                data["sentry.interfaces.User"].setdefault("ip_address", ip_address)

        if data["culprit"]:
            data["culprit"] = trim(data["culprit"], MAX_CULPRIT_LENGTH)

        return data
예제 #11
0
파일: http.py 프로젝트: getsentry/sentry
    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if rust_renormalized:
            data.setdefault('query_string', [])
            for key in (
                "method",
                "url",
                "fragment",
                "cookies",
                "headers",
                "data",
                "env",
                "inferred_content_type",
            ):
                data.setdefault(key, None)
            return cls(**data)

        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET', 'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        if data.get('url', None):
            url = to_unicode(data['url'])
            # The JavaScript SDK used to send an ellipsis character for
            # truncated URLs. Canonical URLs do not contain UTF-8 characters in
            # either the path, query string or fragment, so we replace it with
            # three dots (which is the behavior of other SDKs). This effectively
            # makes the string two characters longer, but it will be trimmed
            # again down below.
            if url.endswith(u"\u2026"):
                url = url[:-1] + "..."
            scheme, netloc, path, query_bit, fragment_bit = urlsplit(url)
        else:
            scheme = netloc = path = query_bit = fragment_bit = None

        query_string = data.get('query_string') or query_bit
        if query_string:
            if isinstance(query_string, six.string_types):
                if query_string[0] == '?':
                    query_string = query_string[1:]
                if query_string.endswith(u"\u2026"):
                    query_string = query_string[:-1] + "..."
                query_string = [
                    (to_unicode(k), jsonify(v))
                    for k, v in parse_qsl(query_string, keep_blank_values=True)
                ]
            elif isinstance(query_string, dict):
                query_string = [(to_unicode(k), jsonify(v)) for k, v in six.iteritems(query_string)]
            elif isinstance(query_string, list):
                query_string = [
                    tuple(tup) for tup in query_string
                    if isinstance(tup, (tuple, list)) and len(tup) == 2
                ]
            else:
                query_string = []
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = []

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        if data.get("headers"):
            headers, cookie_header = format_headers(get_path(data, "headers", filter=True))
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'), None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
예제 #12
0
    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if rust_renormalized:
            data = data.copy()
            for key in (
                    'id',
                    'email',
                    'username',
                    'ip_address',
                    'name',
                    'geo',
                    'data',
            ):
                data.setdefault(key, None)
            if data['geo'] is not None:
                data['geo'] = Geo.to_python(data['geo'])
            return cls(**data)

        data = data.copy()

        ident = data.pop('id', None)
        if ident is not None:
            ident = trim(six.text_type(ident), 128)

        email = data.pop('email', None)
        if not isinstance(email, six.string_types):
            email = None
        email = trim(email, MAX_EMAIL_FIELD_LENGTH)

        username = data.pop('username', None)
        if username is not None:
            username = trim(six.text_type(username), 128)

        name = data.pop('name', None)
        if name is not None:
            name = trim(six.text_type(name), 128)

        try:
            ip_address = validate_ip(data.pop('ip_address', None), False)
        except ValueError:
            ip_address = None

        geo = data.pop('geo', None)
        if not geo and ip_address:
            geo = Geo.from_ip_address(ip_address)
        elif geo:
            geo = Geo.to_python(geo)

        extra_data = data.pop('data', None)
        if not isinstance(extra_data, dict):
            extra_data = {}
        extra_data.update(data)

        # TODO(dcramer): patch in fix to deal w/ old data but not allow new
        # if not (ident or email or username or ip_address):
        #     raise ValueError('No identifying value')

        kwargs = {
            'id': ident,
            'email': email,
            'username': username,
            'ip_address': ip_address,
            'name': name,
            'geo': geo,
            'data': trim_dict(extra_data)
        }

        return cls(**kwargs)
예제 #13
0
파일: http.py 프로젝트: Kayle009/sentry
    def to_python(cls, data):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET', 'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        if data.get('url', None):
            scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url'])
        else:
            scheme = netloc = path = query_bit = fragment_bit = None

        query_string = data.get('query_string') or query_bit
        if query_string:
            # if querystring was a dict, convert it to a string
            if isinstance(query_string, dict):
                query_string = urlencode(
                    [(to_bytes(k), to_bytes(v)) for k, v in query_string.items()]
                )
            else:
                if query_string[0] == '?':
                    # remove '?' prefix
                    query_string = query_string[1:]
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = ''

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        headers = data.get('headers')
        if headers:
            headers, cookie_header = format_headers(headers)
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'), None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
예제 #14
0
    def normalize(self):
        data = self.data
        errors = data.get('errors', [])

        # Before validating with a schema, attempt to cast values to their desired types
        # so that the schema doesn't have to take every type variation into account.
        text = six.text_type
        fp_types = six.string_types + six.integer_types + (float, )

        def to_values(v):
            return {'values': v} if v and isinstance(v, (tuple, list)) else v

        casts = {
            'environment':
            lambda v: text(v) if v is not None else v,
            'fingerprint':
            lambda v: list(map(text, v))
            if isinstance(v, list) and all(isinstance(f, fp_types)
                                           for f in v) else v,
            'release':
            lambda v: text(v) if v is not None else v,
            'dist':
            lambda v: text(v).strip() if v is not None else v,
            'time_spent':
            lambda v: int(v) if v is not None else v,
            'tags':
            lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip())
                       for (v_k, v_v) in dict(v).items()],
            'timestamp':
            lambda v: process_timestamp(v),
            'platform':
            lambda v: v if v in VALID_PLATFORMS else 'other',

            # These can be sent as lists and need to be converted to {'values': [...]}
            'exception':
            to_values,
            'sentry.interfaces.Exception':
            to_values,
            'breadcrumbs':
            to_values,
            'sentry.interfaces.Breadcrumbs':
            to_values,
            'threads':
            to_values,
            'sentry.interfaces.Threads':
            to_values,
        }

        for c in casts:
            if c in data:
                try:
                    data[c] = casts[c](data[c])
                except Exception as e:
                    errors.append({
                        'type': EventError.INVALID_DATA,
                        'name': c,
                        'value': data[c]
                    })
                    del data[c]

        # raw 'message' is coerced to the Message interface, as its used for pure index of
        # searchable strings. If both a raw 'message' and a Message interface exist, try and
        # add the former as the 'formatted' attribute of the latter.
        # See GH-3248
        msg_str = data.pop('message', None)
        if msg_str:
            msg_if = data.setdefault('sentry.interfaces.Message',
                                     {'message': msg_str})
            if msg_if.get('message') != msg_str:
                msg_if.setdefault('formatted', msg_str)

        # Validate main event body and tags against schema
        is_valid, event_errors = validate_and_default_interface(data, 'event')
        errors.extend(event_errors)

        if 'tags' in data:
            is_valid, tag_errors = validate_and_default_interface(data['tags'],
                                                                  'tags',
                                                                  name='tags')
            errors.extend(tag_errors)

        # Validate interfaces
        for k in list(iter(data)):
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.logger.debug('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.logger.debug('Ignored unknown attribute: %s', k)
                errors.append({
                    'type': EventError.INVALID_ATTRIBUTE,
                    'name': k
                })
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                log = self.logger.debug if isinstance(
                    e, InterfaceValidationError) else self.logger.error
                log('Discarded invalid value for interface: %s (%r)',
                    k,
                    value,
                    exc_info=True)
                errors.append({
                    'type': EventError.INVALID_DATA,
                    'name': k,
                    'value': value
                })

        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, int) or (isinstance(level, six.string_types)
                                      and level.isdigit()):
            level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL)
        data['level'] = LOG_LEVELS_MAP.get(level,
                                           LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL])

        if data.get('dist') and not data.get('release'):
            data['dist'] = None

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        # TODO (alex) can this all be replaced by utcnow?
        # it looks like the only time that this would even be hit is when timestamp
        # is not defined, as the earlier process_timestamp already converts existing
        # timestamps to floats.
        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        data.setdefault('culprit', None)
        data.setdefault('transaction', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('checksum', None)
        data.setdefault('fingerprint', None)
        data.setdefault('platform', None)
        data.setdefault('dist', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('tags', [])

        # Fix case where legacy apps pass 'environment' as a tag
        # instead of a top level key.
        # TODO (alex) save() just reinserts the environment into the tags
        if not data.get('environment'):
            tagsdict = dict(data['tags'])
            if 'environment' in tagsdict:
                data['environment'] = tagsdict['environment']
                del tagsdict['environment']
                data['tags'] = tagsdict.items()

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key
        data['version'] = self.version

        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        if 'sentry.interfaces.Http' in data:
            try:
                ip_address = validate_ip(
                    data['sentry.interfaces.Http'].get('env',
                                                       {}).get('REMOTE_ADDR'),
                    required=False,
                )
                if ip_address:
                    data.setdefault('sentry.interfaces.User',
                                    {}).setdefault('ip_address', ip_address)
            except ValueError:
                pass

        # Trim values
        logger = data.get('logger', DEFAULT_LOGGER_NAME)
        data['logger'] = trim(logger.strip(), 64)

        trim_dict(data['extra'],
                  max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['transaction']:
            data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH)

        data['errors'] = errors

        return data
예제 #15
0
    def normalize(self):
        # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip
        # First we pull out our top-level (non-data attr) kwargs
        data = self.data

        if not isinstance(data.get('level'), (six.string_types, int)):
            data['level'] = logging.ERROR
        elif data['level'] not in LOG_LEVELS:
            data['level'] = logging.ERROR

        if not data.get('logger'):
            data['logger'] = DEFAULT_LOGGER_NAME
        else:
            logger = trim(data['logger'].strip(), 64)
            if TagKey.is_valid_key(logger):
                data['logger'] = logger
            else:
                data['logger'] = DEFAULT_LOGGER_NAME

        if data.get('platform'):
            data['platform'] = trim(data['platform'], 64)

        current_timestamp = timezone.now()
        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = current_timestamp

        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        if not data.get('event_id'):
            data['event_id'] = uuid4().hex

        data.setdefault('culprit', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('checksum', None)
        data.setdefault('fingerprint', None)
        data.setdefault('platform', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('errors', [])

        tags = data.get('tags')
        if not tags:
            tags = []
        # full support for dict syntax
        elif isinstance(tags, dict):
            tags = list(tags.items())
        # prevent [tag, tag, tag] (invalid) syntax
        elif not all(len(t) == 2 for t in tags):
            tags = []
        else:
            tags = list(tags)

        data['tags'] = []
        for key, value in tags:
            key = six.text_type(key).strip()
            value = six.text_type(value).strip()
            if not (key and value):
                continue

            # XXX(dcramer): many legacy apps are using the environment tag
            # rather than the key itself
            if key == 'environment' and not data.get('environment'):
                data['environment'] = value
            else:
                data['tags'].append((key, value))

        if not isinstance(data['extra'], dict):
            # throw it away
            data['extra'] = {}

        trim_dict(
            data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        # TODO(dcramer): more of validate data needs stuffed into the manager
        for key in list(iter(data)):
            if key in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(key)

            try:
                interface = get_interface(key)()
            except ValueError:
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception:
                # XXX: we should consider logging this.
                pass

        # TODO(dcramer): this logic is duplicated in ``validate_data`` from
        # coreapi

        # message is coerced to an interface, as its used for pure
        # index of searchable strings
        # See GH-3248
        message = data.pop('message', None)
        if message:
            if 'sentry.interfaces.Message' not in data:
                interface = get_interface('sentry.interfaces.Message')
                try:
                    inst = interface.to_python({
                        'message': message,
                    })
                    data[inst.get_path()] = inst.to_json()
                except Exception:
                    pass
            elif not data['sentry.interfaces.Message'].get('formatted'):
                interface = get_interface('sentry.interfaces.Message')
                try:
                    inst = interface.to_python(dict(
                        data['sentry.interfaces.Message'],
                        formatted=message,
                    ))
                    data[inst.get_path()] = inst.to_json()
                except Exception:
                    pass

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key

        data['version'] = self.version

        # TODO(dcramer): find a better place for this logic
        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        if 'sentry.interfaces.Http' in data:
            try:
                ip_address = validate_ip(
                    data['sentry.interfaces.Http'].get(
                        'env', {}).get('REMOTE_ADDR'),
                    required=False,
                )
            except ValueError:
                ip_address = None
            if ip_address:
                data.setdefault('sentry.interfaces.User', {})
                data['sentry.interfaces.User'].setdefault(
                    'ip_address', ip_address)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        return data
예제 #16
0
    def normalize(self):
        # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip
        # First we pull out our top-level (non-data attr) kwargs
        data = self.data

        if not isinstance(data.get('level'), (six.string_types, int)):
            data['level'] = logging.ERROR
        elif data['level'] not in LOG_LEVELS:
            data['level'] = logging.ERROR

        if not data.get('logger') or not isinstance(data.get('logger'), six.string_types):
            data['logger'] = DEFAULT_LOGGER_NAME
        else:
            logger = trim(data['logger'].strip(), 64)
            if tagstore.is_valid_key(logger):
                data['logger'] = logger
            else:
                data['logger'] = DEFAULT_LOGGER_NAME

        if data.get('platform'):
            data['platform'] = trim(data['platform'], 64)

        current_timestamp = timezone.now()
        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = current_timestamp

        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        if not data.get('event_id'):
            data['event_id'] = uuid4().hex

        data.setdefault('culprit', None)
        data.setdefault('transaction', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('checksum', None)
        data.setdefault('fingerprint', None)
        data.setdefault('platform', None)
        data.setdefault('dist', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('errors', [])

        tags = data.get('tags')
        if not tags:
            tags = []
        # full support for dict syntax
        elif isinstance(tags, dict):
            tags = list(tags.items())
        # prevent [tag, tag, tag] (invalid) syntax
        elif not all(len(t) == 2 for t in tags):
            tags = []
        else:
            tags = list(tags)

        data['tags'] = []
        for key, value in tags:
            key = six.text_type(key).strip()
            value = six.text_type(value).strip()
            if not (key and value):
                continue

            # XXX(dcramer): many legacy apps are using the environment tag
            # rather than the key itself
            if key == 'environment' and not data.get('environment'):
                data['environment'] = value
            else:
                data['tags'].append((key, value))

        if not isinstance(data['extra'], dict):
            # throw it away
            data['extra'] = {}

        trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        # TODO(dcramer): more of validate data needs stuffed into the manager
        for key in list(iter(data)):
            if key in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(key)

            try:
                interface = get_interface(key)()
            except ValueError:
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception:
                # XXX: we should consider logging this.
                pass

        # TODO(dcramer): this logic is duplicated in ``validate_data`` from
        # coreapi

        # message is coerced to an interface, as its used for pure
        # index of searchable strings
        # See GH-3248
        message = data.pop('message', None)
        if message:
            if 'sentry.interfaces.Message' not in data:
                interface = get_interface('sentry.interfaces.Message')
                try:
                    inst = interface.to_python({
                        'message': message,
                    })
                    data[inst.get_path()] = inst.to_json()
                except Exception:
                    pass
            elif not data['sentry.interfaces.Message'].get('formatted'):
                interface = get_interface('sentry.interfaces.Message')
                try:
                    inst = interface.to_python(
                        dict(
                            data['sentry.interfaces.Message'],
                            formatted=message,
                        )
                    )
                    data[inst.get_path()] = inst.to_json()
                except Exception:
                    pass

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key

        data['version'] = self.version

        # TODO(dcramer): find a better place for this logic
        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        if 'sentry.interfaces.Http' in data:
            try:
                ip_address = validate_ip(
                    data['sentry.interfaces.Http'].get('env', {}).get('REMOTE_ADDR'),
                    required=False,
                )
            except ValueError:
                ip_address = None
            if ip_address:
                data.setdefault('sentry.interfaces.User', {})
                data['sentry.interfaces.User'].setdefault('ip_address', ip_address)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['transaction']:
            data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH)

        return data
예제 #17
0
파일: user.py 프로젝트: getsentry/sentry
    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if rust_renormalized:
            data = data.copy()
            for key in (
                'id',
                'email',
                'username',
                'ip_address',
                'name',
                'geo',
                'data',
            ):
                data.setdefault(key, None)
            if data['geo'] is not None:
                data['geo'] = Geo.to_python(data['geo'])
            return cls(**data)

        data = data.copy()

        ident = data.pop('id', None)
        if ident is not None:
            ident = trim(six.text_type(ident), 128)

        email = data.pop('email', None)
        if not isinstance(email, six.string_types):
            email = None
        email = trim(email, MAX_EMAIL_FIELD_LENGTH)

        username = data.pop('username', None)
        if username is not None:
            username = trim(six.text_type(username), 128)

        name = data.pop('name', None)
        if name is not None:
            name = trim(six.text_type(name), 128)

        try:
            ip_address = validate_ip(data.pop('ip_address', None), False)
        except ValueError:
            ip_address = None

        geo = data.pop('geo', None)
        if not geo and ip_address:
            geo = Geo.from_ip_address(ip_address)
        elif geo:
            geo = Geo.to_python(geo)

        extra_data = data.pop('data', None)
        if not isinstance(extra_data, dict):
            extra_data = {}
        extra_data.update(data)

        # TODO(dcramer): patch in fix to deal w/ old data but not allow new
        # if not (ident or email or username or ip_address):
        #     raise ValueError('No identifying value')

        kwargs = {
            'id': ident,
            'email': email,
            'username': username,
            'ip_address': ip_address,
            'name': name,
            'geo': geo,
            'data': trim_dict(extra_data)
        }

        return cls(**kwargs)