Beispiel #1
0
    def test_normalize_errno_fail(self):
        data = {'type': 'generic', 'meta': {'errno': {'number': 2}}}

        normalize_mechanism_meta(data, {'sdk_name': 'invalid'})
        assert data['meta']['errno'] == {
            'number': 2,
        }
Beispiel #2
0
    def test_normalize_partial_signal(self):
        data = {'type': 'generic', 'meta': {'signal': {'number': 11}}}

        normalize_mechanism_meta(data, {'sdk_name': 'linux'})
        assert data['meta']['signal'] == {
            'number': 11,
            'name': 'SIGSEGV',
        }
Beispiel #3
0
    def test_normalize_errno(self):
        data = {
            'type': 'generic',
            'meta': {
                'errno': {
                    'number': 2
                }
            }
        }

        normalize_mechanism_meta(data, {'sdk_name': 'linux'})
        assert data['meta']['errno'] == {
            'number': 2,
            'name': 'ENOENT'
        }
Beispiel #4
0
    def test_normalize_signal_fail(self):
        data = {
            'type': 'generic',
            'meta': {
                'signal': {
                    'number': 11,
                    'code': 0,
                }
            }
        }

        normalize_mechanism_meta(data, {'sdk_name': 'invalid'})
        assert data['meta']['signal'] == {
            'number': 11,
            'code': 0,
        }
Beispiel #5
0
    def test_normalize_errno_override(self):
        data = {
            'type': 'generic',
            'meta': {
                'errno': {
                    'number': 2,
                    'name': 'OVERRIDDEN',
                }
            }
        }

        normalize_mechanism_meta(data, {'sdk_name': 'linux'})
        assert data['meta']['errno'] == {
            'number': 2,
            'name': 'OVERRIDDEN',
        }
Beispiel #6
0
    def test_normalize_signal(self):
        data = {
            'type': 'generic',
            'meta': {
                'signal': {
                    'number': 11,
                    'code': 0,
                }
            }
        }

        normalize_mechanism_meta(data, {'sdk_name': 'macos'})
        assert data['meta']['signal'] == {
            'number': 11,
            'code': 0,
            'name': 'SIGSEGV',
            'code_name': 'SEGV_NOOP'
        }
Beispiel #7
0
    def test_normalize_signal_override(self):
        data = {
            'type': 'generic',
            'meta': {
                'signal': {
                    'number': 11,
                    'code': 0,
                    'name': 'OVERRIDDEN',
                    'code_name': 'OVERRIDDEN',
                }
            }
        }

        normalize_mechanism_meta(data, {'sdk_name': 'macos'})
        assert data['meta']['signal'] == {
            'number': 11,
            'code': 0,
            'name': 'OVERRIDDEN',
            'code_name': 'OVERRIDDEN',
        }
Beispiel #8
0
    def test_normalize_mach_fail(self):
        data = {
            'type': 'generic',
            'meta': {
                'mach_exception': {
                    'exception': 99,
                    'subcode': 8,
                    'code': 1,
                }
            }
        }

        # We do not need SDK information here because mach exceptions only
        # occur on Darwin

        normalize_mechanism_meta(data, None)
        assert data['meta']['mach_exception'] == {
            'exception': 99,
            'subcode': 8,
            'code': 1,
        }
Beispiel #9
0
 def test_normalize_missing(self):
     data = {'type': 'generic'}
     normalize_mechanism_meta(data, None)
     assert data == {'type': 'generic'}
Beispiel #10
0
    def normalize(self, request_env=None):
        request_env = request_env or {}
        data = self.data
        errors = data['errors'] = []

        # Ignore event meta data for now.
        data.pop('_meta', None)

        # Before validating with a schema, attempt to cast values to their desired types
        # so that the schema doesn't have to take every type variation into account.
        text = six.text_type
        fp_types = six.string_types + six.integer_types + (float, )

        def to_values(v):
            return {'values': v} if v and isinstance(v, (tuple, list)) else v

        def stringify(f):
            if isinstance(f, float):
                return text(int(f)) if abs(f) < (1 << 53) else None
            return text(f)

        casts = {
            'environment': lambda v: text(v) if v is not None else v,
            'fingerprint': lambda v: list(x for x in map(stringify, v) if x is not None) if isinstance(v, list) and all(isinstance(f, fp_types) for f in v) else v,
            'release': lambda v: text(v) if v is not None else v,
            'dist': lambda v: text(v).strip() if v is not None else v,
            'time_spent': lambda v: int(v) if v is not None else v,
            'tags': lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip()) for (v_k, v_v) in dict(v).items()],
            'timestamp': lambda v: process_timestamp(v),
            'platform': lambda v: v if v in VALID_PLATFORMS else 'other',
            'logentry': lambda v: v if isinstance(v, dict) else {'message': v},

            # These can be sent as lists and need to be converted to {'values': [...]}
            'exception': to_values,
            'breadcrumbs': to_values,
            'threads': to_values,
        }

        for c in casts:
            if c in data:
                try:
                    data[c] = casts[c](data[c])
                except InvalidTimestamp as it:
                    errors.append({'type': it.args[0], 'name': c, 'value': data[c]})
                    del data[c]
                except Exception as e:
                    errors.append({'type': EventError.INVALID_DATA, 'name': c, 'value': data[c]})
                    del data[c]

        # raw 'message' is coerced to the Message interface, as its used for pure index of
        # searchable strings. If both a raw 'message' and a Message interface exist, try and
        # add the former as the 'formatted' attribute of the latter.
        # See GH-3248
        msg_str = data.pop('message', None)
        if msg_str:
            msg_if = data.get('logentry')
            msg_meta = data.get('_meta', {}).get('message')

            if not msg_if:
                msg_if = data['logentry'] = {'message': msg_str}
                if msg_meta:
                    data.setdefault('_meta', {}).setdefault('logentry', {})['message'] = msg_meta

            if msg_if.get('message') != msg_str:
                if not msg_if.get('formatted'):
                    msg_if['formatted'] = msg_str
                    if msg_meta:
                        data.setdefault('_meta', {}).setdefault(
                            'logentry', {})['formatted'] = msg_meta

        # Fill in ip addresses marked as {{auto}}
        client_ip = request_env.get('client_ip')
        if client_ip:
            if get_path(data, ['sentry.interfaces.Http', 'env', 'REMOTE_ADDR']) == '{{auto}}':
                data['sentry.interfaces.Http']['env']['REMOTE_ADDR'] = client_ip

            if get_path(data, ['request', 'env', 'REMOTE_ADDR']) == '{{auto}}':
                data['request']['env']['REMOTE_ADDR'] = client_ip

            if get_path(data, ['sentry.interfaces.User', 'ip_address']) == '{{auto}}':
                data['sentry.interfaces.User']['ip_address'] = client_ip

            if get_path(data, ['user', 'ip_address']) == '{{auto}}':
                data['user']['ip_address'] = client_ip

        # Validate main event body and tags against schema.
        # XXX(ja): jsonschema does not like CanonicalKeyDict, so we need to pass
        #          in the inner data dict.
        is_valid, event_errors = validate_and_default_interface(data.data, 'event')
        errors.extend(event_errors)
        if 'tags' in data:
            is_valid, tag_errors = validate_and_default_interface(data['tags'], 'tags', name='tags')
            errors.extend(tag_errors)

        # Validate interfaces
        for k in list(iter(data)):
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.logger.debug('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.logger.debug('Ignored unknown attribute: %s', k)
                errors.append({'type': EventError.INVALID_ATTRIBUTE, 'name': k})
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                log = self.logger.debug if isinstance(
                    e, InterfaceValidationError) else self.logger.error
                log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True)
                errors.append({'type': EventError.INVALID_DATA, 'name': k, 'value': value})

        # Additional data coercion and defaulting
        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, int) or (isinstance(level, six.string_types) and level.isdigit()):
            level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL)
        data['level'] = LOG_LEVELS_MAP.get(level, LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL])

        if data.get('dist') and not data.get('release'):
            data['dist'] = None

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        # TODO (alex) can this all be replaced by utcnow?
        # it looks like the only time that this would even be hit is when timestamp
        # is not defined, as the earlier process_timestamp already converts existing
        # timestamps to floats.
        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        data.setdefault('checksum', None)
        data.setdefault('culprit', None)
        data.setdefault('dist', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('fingerprint', None)
        data.setdefault('logger', DEFAULT_LOGGER_NAME)
        data.setdefault('platform', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('tags', [])
        data.setdefault('transaction', None)

        # Fix case where legacy apps pass 'environment' as a tag
        # instead of a top level key.
        # TODO (alex) save() just reinserts the environment into the tags
        if not data.get('environment'):
            tagsdict = dict(data['tags'])
            if 'environment' in tagsdict:
                data['environment'] = tagsdict['environment']
                del tagsdict['environment']
                data['tags'] = tagsdict.items()

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key
        data['version'] = self.version

        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        # Exception mechanism needs SDK information to resolve proper names in
        # exception meta (such as signal names). "SDK Information" really means
        # the operating system version the event was generated on. Some
        # normalization still works without sdk_info, such as mach_exception
        # names (they can only occur on macOS).
        if exception:
            sdk_info = get_sdk_from_event(data)
            for ex in exception['values']:
                if 'mechanism' in ex:
                    normalize_mechanism_meta(ex['mechanism'], sdk_info)

        # If there is no User ip_addres, update it either from the Http interface
        # or the client_ip of the request.
        auth = request_env.get('auth')
        is_public = auth and auth.is_public
        add_ip_platforms = ('javascript', 'cocoa', 'objc')

        http_ip = data.get('sentry.interfaces.Http', {}).get('env', {}).get('REMOTE_ADDR')
        if http_ip:
            data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', http_ip)
        elif client_ip and (is_public or data.get('platform') in add_ip_platforms):
            data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', client_ip)

        # Trim values
        data['logger'] = trim(data['logger'].strip(), 64)
        trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['transaction']:
            data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH)

        return data
Beispiel #11
0
    def normalize(self, request_env=None):
        request_env = request_env or {}
        data = self.data
        errors = data['errors'] = []

        # Before validating with a schema, attempt to cast values to their desired types
        # so that the schema doesn't have to take every type variation into account.
        text = six.text_type
        fp_types = six.string_types + six.integer_types + (float, )

        def to_values(v):
            return {'values': v} if v and isinstance(v, (tuple, list)) else v

        def convert_fingerprint(values):
            rv = values[:]
            bad_float = False
            for idx, item in enumerate(rv):
                if isinstance(item, float) and \
                   (abs(item) >= (1 << 53) or int(item) != item):
                    bad_float = True
                rv[idx] = text(item)
            if bad_float:
                metrics.incr(
                    'events.bad_float_fingerprint',
                    skip_internal=True,
                    tags={
                        'project_id': data.get('project'),
                    },
                )
            return rv

        casts = {
            'environment':
            lambda v: text(v) if v is not None else v,
            'fingerprint':
            lambda v: convert_fingerprint(v)
            if isinstance(v, list) and all(isinstance(f, fp_types)
                                           for f in v) else v,
            'release':
            lambda v: text(v) if v is not None else v,
            'dist':
            lambda v: text(v).strip() if v is not None else v,
            'time_spent':
            lambda v: int(v) if v is not None else v,
            'tags':
            lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip())
                       for (v_k, v_v) in dict(v).items()],
            'timestamp':
            lambda v: process_timestamp(v),
            'platform':
            lambda v: v if v in VALID_PLATFORMS else 'other',
            'sentry.interfaces.Message':
            lambda v: v if isinstance(v, dict) else {
                'message': v
            },

            # These can be sent as lists and need to be converted to {'values': [...]}
            'exception':
            to_values,
            'sentry.interfaces.Exception':
            to_values,
            'breadcrumbs':
            to_values,
            'sentry.interfaces.Breadcrumbs':
            to_values,
            'threads':
            to_values,
            'sentry.interfaces.Threads':
            to_values,
        }

        for c in casts:
            if c in data:
                try:
                    data[c] = casts[c](data[c])
                except InvalidTimestamp as it:
                    errors.append({
                        'type': it.args[0],
                        'name': c,
                        'value': data[c]
                    })
                    del data[c]
                except Exception as e:
                    errors.append({
                        'type': EventError.INVALID_DATA,
                        'name': c,
                        'value': data[c]
                    })
                    del data[c]

        # raw 'message' is coerced to the Message interface, as its used for pure index of
        # searchable strings. If both a raw 'message' and a Message interface exist, try and
        # add the former as the 'formatted' attribute of the latter.
        # See GH-3248
        msg_str = data.pop('message', None)
        if msg_str:
            msg_if = data.setdefault('sentry.interfaces.Message',
                                     {'message': msg_str})
            if msg_if.get('message') != msg_str:
                msg_if.setdefault('formatted', msg_str)

        # Fill in ip addresses marked as {{auto}}
        client_ip = request_env.get('client_ip')
        if client_ip:
            if get_path(data, ['sentry.interfaces.Http', 'env', 'REMOTE_ADDR'
                               ]) == '{{auto}}':
                data['sentry.interfaces.Http']['env'][
                    'REMOTE_ADDR'] = client_ip

            if get_path(data, ['request', 'env', 'REMOTE_ADDR']) == '{{auto}}':
                data['request']['env']['REMOTE_ADDR'] = client_ip

            if get_path(
                    data,
                ['sentry.interfaces.User', 'ip_address']) == '{{auto}}':
                data['sentry.interfaces.User']['ip_address'] = client_ip

            if get_path(data, ['user', 'ip_address']) == '{{auto}}':
                data['user']['ip_address'] = client_ip

        # Validate main event body and tags against schema
        is_valid, event_errors = validate_and_default_interface(data, 'event')
        errors.extend(event_errors)
        if 'tags' in data:
            is_valid, tag_errors = validate_and_default_interface(data['tags'],
                                                                  'tags',
                                                                  name='tags')
            errors.extend(tag_errors)

        # Validate interfaces
        for k in list(iter(data)):
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.logger.debug('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.logger.debug('Ignored unknown attribute: %s', k)
                errors.append({
                    'type': EventError.INVALID_ATTRIBUTE,
                    'name': k
                })
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                log = self.logger.debug if isinstance(
                    e, InterfaceValidationError) else self.logger.error
                log('Discarded invalid value for interface: %s (%r)',
                    k,
                    value,
                    exc_info=True)
                errors.append({
                    'type': EventError.INVALID_DATA,
                    'name': k,
                    'value': value
                })

        # Additional data coercion and defaulting
        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, int) or (isinstance(level, six.string_types)
                                      and level.isdigit()):
            level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL)
        data['level'] = LOG_LEVELS_MAP.get(level,
                                           LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL])

        if data.get('dist') and not data.get('release'):
            data['dist'] = None

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        # TODO (alex) can this all be replaced by utcnow?
        # it looks like the only time that this would even be hit is when timestamp
        # is not defined, as the earlier process_timestamp already converts existing
        # timestamps to floats.
        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        data.setdefault('checksum', None)
        data.setdefault('culprit', None)
        data.setdefault('dist', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('fingerprint', None)
        data.setdefault('logger', DEFAULT_LOGGER_NAME)
        data.setdefault('platform', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('tags', [])
        data.setdefault('transaction', None)

        # Fix case where legacy apps pass 'environment' as a tag
        # instead of a top level key.
        # TODO (alex) save() just reinserts the environment into the tags
        if not data.get('environment'):
            tagsdict = dict(data['tags'])
            if 'environment' in tagsdict:
                data['environment'] = tagsdict['environment']
                del tagsdict['environment']
                data['tags'] = tagsdict.items()

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key
        data['version'] = self.version

        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        # Exception mechanism needs SDK information to resolve proper names in
        # exception meta (such as signal names). "SDK Information" really means
        # the operating system version the event was generated on. Some
        # normalization still works without sdk_info, such as mach_exception
        # names (they can only occur on macOS).
        if exception:
            sdk_info = get_sdk_from_event(data)
            for ex in exception['values']:
                if 'mechanism' in ex:
                    normalize_mechanism_meta(ex['mechanism'], sdk_info)

        # If there is no User ip_addres, update it either from the Http interface
        # or the client_ip of the request.
        auth = request_env.get('auth')
        is_public = auth and auth.is_public
        add_ip_platforms = ('javascript', 'cocoa', 'objc')

        http_ip = data.get('sentry.interfaces.Http',
                           {}).get('env', {}).get('REMOTE_ADDR')
        if http_ip:
            data.setdefault('sentry.interfaces.User',
                            {}).setdefault('ip_address', http_ip)
        elif client_ip and (is_public
                            or data.get('platform') in add_ip_platforms):
            data.setdefault('sentry.interfaces.User',
                            {}).setdefault('ip_address', client_ip)

        # Trim values
        data['logger'] = trim(data['logger'].strip(), 64)
        trim_dict(data['extra'],
                  max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['transaction']:
            data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH)

        return data