Exemple #1
0
    def to_python(cls, data, slim_frames=True, raw=False):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid stack frame data.")

        frame_list = [
            # XXX(dcramer): handle PHP sending an empty array for a frame
            Frame.to_python(f or {}, raw=raw) for f in data['frames']
        ]

        kwargs = {
            'frames': frame_list,
        }

        kwargs['registers'] = None
        if data.get('registers') and isinstance(data['registers'], dict):
            kwargs['registers'] = data.get('registers')

        if data.get('frames_omitted'):
            kwargs['frames_omitted'] = data['frames_omitted']
        else:
            kwargs['frames_omitted'] = None

        instance = cls(**kwargs)
        if slim_frames:
            slim_frame_data(instance)
        return instance
Exemple #2
0
    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        # TODO(markus): semaphore does not validate security interfaces yet
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        return cls(**data)
Exemple #3
0
    def to_python(cls, data, slim_frames=True, raw=False):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid stack frame data.")

        # Trim down the frame list to a hard limit. Leave the last frame in place in case
        # it's useful for debugging.
        frameiter = data.get('frames') or []
        if len(frameiter) > settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT:
            frameiter = chain(
                islice(data['frames'], settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT - 1), (data['frames'][-1],))

        frame_list = []

        for f in frameiter:
            if f is None:
                continue
            # XXX(dcramer): handle PHP sending an empty array for a frame
            frame_list.append(Frame.to_python(f or {}, raw=raw))

        kwargs = {
            'frames': frame_list,
        }

        kwargs['registers'] = None
        if data.get('registers') and isinstance(data['registers'], dict):
            kwargs['registers'] = data.get('registers')

        kwargs['frames_omitted'] = data.get('frames_omitted') or None

        instance = cls(**kwargs)
        if slim_frames:
            slim_frame_data(instance)
        return instance
Exemple #4
0
    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if rust_renormalized:
            for key in (
                'abs_path',
                'filename',
                'context_line',
                'lineno',
                'pre_context',
                'post_context',
            ):
                data.setdefault(key, None)
            return cls(**data)

        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid template")

        kwargs = {
            'abs_path': trim(data.get('abs_path', None), 256),
            'filename': trim(data.get('filename', None), 256),
            'context_line': trim(data.get('context_line', None), 256),
            'lineno': int(data['lineno']) if data.get('lineno', None) is not None else None,
            # TODO(dcramer): trim pre/post_context
            'pre_context': data.get('pre_context'),
            'post_context': data.get('post_context'),
        }
        return cls(**kwargs)
Exemple #5
0
    def to_python(cls, data, slim_frames=True, raw=False):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid stack frame data.")

        # Trim down the frame list to a hard limit. Leave the last frame in place in case
        # it's useful for debugging.
        frameiter = data.get('frames') or []
        if len(frameiter) > settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT:
            frameiter = chain(
                islice(data['frames'], settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT - 1), (data['frames'][-1],))

        frame_list = []

        for f in frameiter:
            if f is None:
                continue
            # XXX(dcramer): handle PHP sending an empty array for a frame
            frame_list.append(Frame.to_python(f or {}, raw=raw))

        kwargs = {
            'frames': frame_list,
        }

        kwargs['registers'] = None
        if data.get('registers') and isinstance(data['registers'], dict):
            kwargs['registers'] = data.get('registers')

        kwargs['frames_omitted'] = data.get('frames_omitted') or None

        instance = cls(**kwargs)
        if slim_frames:
            slim_frame_data(instance)
        return instance
Exemple #6
0
    def to_python(cls, data):
        # TODO(markus): semaphore does not validate security interfaces yet
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        return cls(**data)
Exemple #7
0
    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        # TODO(markus): semaphore does not validate security interfaces yet
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        return cls(**data)
Exemple #8
0
    def to_python(cls, data, slim_frames=True, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if not rust_renormalized:
            is_valid, errors = validate_and_default_interface(data, cls.path)
            if not is_valid:
                raise InterfaceValidationError("Invalid exception")

            if not (data.get('type') or data.get('value')):
                raise InterfaceValidationError("No 'type' or 'value' present")

        if get_path(data, 'stacktrace', 'frames', filter=True):
            stacktrace = Stacktrace.to_python(
                data['stacktrace'],
                slim_frames=slim_frames,
                rust_renormalized=rust_renormalized
            )
        else:
            stacktrace = None

        if get_path(data, 'raw_stacktrace', 'frames', filter=True):
            raw_stacktrace = Stacktrace.to_python(
                data['raw_stacktrace'], slim_frames=slim_frames, raw=True,
                rust_renormalized=rust_renormalized
            )
        else:
            raw_stacktrace = None

        type = data.get('type')
        value = data.get('value')

        if not rust_renormalized:
            if isinstance(value, six.string_types):
                if type is None:
                    m = _type_value_re.match(value)
                    if m:
                        type = m.group(1)
                        value = m.group(2).strip()
            elif value is not None:
                value = json.dumps(value)

            value = trim(value, 4096)

        if data.get('mechanism'):
            mechanism = Mechanism.to_python(data['mechanism'],
                                            rust_renormalized=rust_renormalized)
        else:
            mechanism = None

        kwargs = {
            'type': trim(type, 128),
            'value': value,
            'module': trim(data.get('module'), 128),
            'mechanism': mechanism,
            'stacktrace': stacktrace,
            'thread_id': trim(data.get('thread_id'), 40),
            'raw_stacktrace': raw_stacktrace,
        }

        return cls(**kwargs)
Exemple #9
0
    def to_python(cls, data, slim_frames=True, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if not rust_renormalized:
            is_valid, errors = validate_and_default_interface(data, cls.path)
            if not is_valid:
                raise InterfaceValidationError("Invalid exception")

            if not (data.get('type') or data.get('value')):
                raise InterfaceValidationError("No 'type' or 'value' present")

        if get_path(data, 'stacktrace', 'frames', filter=True):
            stacktrace = Stacktrace.to_python(
                data['stacktrace'],
                slim_frames=slim_frames,
                rust_renormalized=rust_renormalized
            )
        else:
            stacktrace = None

        if get_path(data, 'raw_stacktrace', 'frames', filter=True):
            raw_stacktrace = Stacktrace.to_python(
                data['raw_stacktrace'], slim_frames=slim_frames, raw=True,
                rust_renormalized=rust_renormalized
            )
        else:
            raw_stacktrace = None

        type = data.get('type')
        value = data.get('value')

        if not rust_renormalized:
            if isinstance(value, six.string_types):
                if type is None:
                    m = _type_value_re.match(value)
                    if m:
                        type = m.group(1)
                        value = m.group(2).strip()
            elif value is not None:
                value = json.dumps(value)

            value = trim(value, 4096)

        if data.get('mechanism'):
            mechanism = Mechanism.to_python(data['mechanism'],
                                            rust_renormalized=rust_renormalized)
        else:
            mechanism = None

        kwargs = {
            'type': trim(type, 128),
            'value': value,
            'module': trim(data.get('module'), 128),
            'mechanism': mechanism,
            'stacktrace': stacktrace,
            'thread_id': trim(data.get('thread_id'), 40),
            'raw_stacktrace': raw_stacktrace,
        }

        return cls(**kwargs)
Exemple #10
0
    def to_python(cls, data):
        data = upgrade_legacy_mechanism(data)
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid mechanism")

        if not data.get('type'):
            raise InterfaceValidationError("No 'type' present")

        mechanism_meta = data.get('meta') or {}
        mach_exception = mechanism_meta.get('mach_exception')
        if mach_exception is not None:
            mach_exception = prune_empty_keys({
                'exception':
                mach_exception['exception'],
                'code':
                mach_exception['code'],
                'subcode':
                mach_exception['subcode'],
                'name':
                mach_exception.get('name'),
            })

        signal = mechanism_meta.get('signal')
        if signal is not None:
            signal = prune_empty_keys({
                'number': signal['number'],
                'code': signal.get('code'),
                'name': signal.get('name'),
                'code_name': signal.get('code_name'),
            })

        errno = mechanism_meta.get('errno')
        if errno is not None:
            errno = prune_empty_keys({
                'number': errno['number'],
                'name': errno.get('name'),
            })

        kwargs = {
            'type': trim(data['type'], 128),
            'synthetic': data.get('synthetic'),
            'description': trim(data.get('description'), 1024),
            'help_link': trim(data.get('help_link'), 1024),
            'handled': data.get('handled'),
            'data': trim(data.get('data'), 4096),
            'meta': {
                'errno': errno,
                'mach_exception': mach_exception,
                'signal': signal,
            },
        }

        return cls(**kwargs)
    def to_python(cls, data, slim_frames=True):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid exception")

        if not (data.get('type') or data.get('value')):
            raise InterfaceValidationError("No 'type' or 'value' present")

        if data.get('stacktrace') and data['stacktrace'].get('frames'):
            stacktrace = Stacktrace.to_python(
                data['stacktrace'],
                slim_frames=slim_frames,
            )
        else:
            stacktrace = None

        if data.get('raw_stacktrace') and data['raw_stacktrace'].get('frames'):
            raw_stacktrace = Stacktrace.to_python(data['raw_stacktrace'],
                                                  slim_frames=slim_frames,
                                                  raw=True)
        else:
            raw_stacktrace = None

        type = data.get('type')
        value = data.get('value')
        if isinstance(value, six.string_types):
            if type is None:
                m = _type_value_re.match(value)
                if m:
                    type = m.group(1)
                    value = m.group(2).strip()
        elif value is not None:
            value = json.dumps(value)

        value = trim(value, 4096)

        mechanism = data.get('mechanism')
        if mechanism is not None:
            mechanism = trim(data.get('mechanism'), 4096)
            mechanism.setdefault('type', 'generic')

        kwargs = {
            'type': trim(type, 128),
            'value': value,
            'module': trim(data.get('module'), 128),
            'mechanism': mechanism,
            'stacktrace': stacktrace,
            'thread_id': trim(data.get('thread_id'), 40),
            'raw_stacktrace': raw_stacktrace,
        }

        return cls(**kwargs)
Exemple #12
0
    def to_python(cls, data):
        data = upgrade_legacy_mechanism(data)
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid mechanism")

        if not data.get('type'):
            raise InterfaceValidationError("No 'type' present")

        meta = data.get('meta', {})
        mach_exception = meta.get('mach_exception')
        if mach_exception is not None:
            mach_exception = prune_empty_keys({
                'exception': mach_exception['exception'],
                'code': mach_exception['code'],
                'subcode': mach_exception['subcode'],
                'name': mach_exception.get('name'),
            })

        signal = meta.get('signal')
        if signal is not None:
            signal = prune_empty_keys({
                'number': signal['number'],
                'code': signal.get('code'),
                'name': signal.get('name'),
                'code_name': signal.get('code_name'),
            })

        errno = meta.get('errno')
        if errno is not None:
            errno = prune_empty_keys({
                'number': errno['number'],
                'name': errno.get('name'),
            })

        kwargs = {
            'type': trim(data['type'], 128),
            'description': trim(data.get('description'), 1024),
            'help_link': trim(data.get('help_link'), 1024),
            'handled': data.get('handled'),
            'data': trim(data.get('data'), 4096),
            'meta': {
                'errno': errno,
                'mach_exception': mach_exception,
                'signal': signal,
            },
        }

        return cls(**kwargs)
Exemple #13
0
    def to_python(cls, data):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid template")

        kwargs = {
            'abs_path': trim(data.get('abs_path', None), 256),
            'filename': trim(data.get('filename', None), 256),
            'context_line': trim(data.get('context_line', None), 256),
            'lineno': int(data['lineno']) if data.get('lineno', None) is not None else None,
            # TODO(dcramer): trim pre/post_context
            'pre_context': data.get('pre_context'),
            'post_context': data.get('post_context'),
        }
        return cls(**kwargs)
Exemple #14
0
    def to_python(cls, data):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid template")

        kwargs = {
            'abs_path': trim(data.get('abs_path', None), 256),
            'filename': trim(data.get('filename', None), 256),
            'context_line': trim(data.get('context_line', None), 256),
            'lineno': int(data['lineno']) if data.get('lineno', None) is not None else None,
            # TODO(dcramer): trim pre/post_context
            'pre_context': data.get('pre_context'),
            'post_context': data.get('post_context'),
        }
        return cls(**kwargs)
    def to_python(cls, data):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid device")

        data = data.copy()

        extra_data = data.pop('data', data)
        name = trim(data.pop('name'), 64)
        version = trim(data.pop('version'), 64)
        build = trim(data.pop('build', None), 64)

        kwargs = {
            'name': name,
            'version': version,
            'build': build,
            'data': trim_dict(extra_data),
        }
        return cls(**kwargs)
Exemple #16
0
    def to_python(cls, data, raw=False):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid stack frame data.")

        abs_path = data.get('abs_path')
        filename = data.get('filename')
        symbol = data.get('symbol')
        function = data.get('function')
        module = data.get('module')
        package = data.get('package')

        # For legacy reasons
        if function in ('?', ''):
            function = None

        # For consistency reasons
        if symbol in ('?', ''):
            symbol = None

        # Some of this processing should only be done for non raw frames
        if not raw:
            # absolute path takes priority over filename
            # (in the end both will get set)
            if not abs_path:
                abs_path = filename
                filename = None

            if not filename and abs_path:
                if is_url(abs_path):
                    urlparts = urlparse(abs_path)
                    if urlparts.path:
                        filename = urlparts.path
                    else:
                        filename = abs_path
                else:
                    filename = abs_path

        platform = data.get('platform')

        context_locals = data.get('vars') or {}
        if isinstance(context_locals, (list, tuple)):
            context_locals = dict(enumerate(context_locals))
        elif not isinstance(context_locals, dict):
            context_locals = {}
        context_locals = trim_dict(context_locals, object_hook=handle_nan)

        # extra data is used purely by internal systems,
        # so we dont trim it
        extra_data = data.get('data') or {}
        if isinstance(extra_data, (list, tuple)):
            extra_data = dict(enumerate(extra_data))

        # XXX: handle lines which were sent as 'null'
        context_line = trim(data.get('context_line'), 256)
        pre_context = data.get('pre_context', None)
        if isinstance(pre_context, list) and pre_context:
            pre_context = [c or '' for c in pre_context]
        else:
            pre_context = None

        post_context = data.get('post_context', None)
        if isinstance(post_context, list) and post_context:
            post_context = [c or '' for c in post_context]
        else:
            post_context = None

        if not context_line and (pre_context or post_context):
            context_line = ''

        in_app = validate_bool(data.get('in_app'), False)

        kwargs = {
            'abs_path': trim(abs_path, 2048),
            'filename': trim(filename, 256),
            'platform': platform,
            'module': trim(module, 256),
            'function': trim(function, 256),
            'package': package,
            'image_addr': to_hex_addr(data.get('image_addr')),
            'symbol': trim(symbol, 256),
            'symbol_addr': to_hex_addr(data.get('symbol_addr')),
            'instruction_addr': to_hex_addr(data.get('instruction_addr')),
            'trust': trim(data.get('trust'), 16),
            'in_app': in_app,
            'context_line': context_line,
            # TODO(dcramer): trim pre/post_context
            'pre_context': pre_context,
            'post_context': post_context,
            'vars': context_locals or None,
            'data': extra_data or None,
            'errors': data.get('errors'),
        }

        if data.get('lineno') is not None:
            lineno = int(data['lineno'])
            if lineno < 0:
                lineno = None
            kwargs['lineno'] = lineno
        else:
            kwargs['lineno'] = None

        if data.get('colno') is not None:
            colno = int(data['colno'])
            if colno < 0:
                colno = None
            kwargs['colno'] = colno
        else:
            kwargs['colno'] = None

        return cls(**kwargs)
Exemple #17
0
    def normalize(self, request_env=None):
        request_env = request_env or {}
        data = self.data
        errors = data['errors'] = []

        # Ignore event meta data for now.
        data.pop('_meta', None)

        # Before validating with a schema, attempt to cast values to their desired types
        # so that the schema doesn't have to take every type variation into account.
        text = six.text_type
        fp_types = six.string_types + six.integer_types + (float, )

        def to_values(v):
            return {'values': v} if v and isinstance(v, (tuple, list)) else v

        def stringify(f):
            if isinstance(f, float):
                return text(int(f)) if abs(f) < (1 << 53) else None
            return text(f)

        casts = {
            'environment': lambda v: text(v) if v is not None else v,
            'fingerprint': lambda v: list(x for x in map(stringify, v) if x is not None) if isinstance(v, list) and all(isinstance(f, fp_types) for f in v) else v,
            'release': lambda v: text(v) if v is not None else v,
            'dist': lambda v: text(v).strip() if v is not None else v,
            'time_spent': lambda v: int(v) if v is not None else v,
            'tags': lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip()) for (v_k, v_v) in dict(v).items()],
            'timestamp': lambda v: process_timestamp(v),
            'platform': lambda v: v if v in VALID_PLATFORMS else 'other',
            'logentry': lambda v: v if isinstance(v, dict) else {'message': v},

            # These can be sent as lists and need to be converted to {'values': [...]}
            'exception': to_values,
            'breadcrumbs': to_values,
            'threads': to_values,
        }

        for c in casts:
            if c in data:
                try:
                    data[c] = casts[c](data[c])
                except InvalidTimestamp as it:
                    errors.append({'type': it.args[0], 'name': c, 'value': data[c]})
                    del data[c]
                except Exception as e:
                    errors.append({'type': EventError.INVALID_DATA, 'name': c, 'value': data[c]})
                    del data[c]

        # raw 'message' is coerced to the Message interface, as its used for pure index of
        # searchable strings. If both a raw 'message' and a Message interface exist, try and
        # add the former as the 'formatted' attribute of the latter.
        # See GH-3248
        msg_str = data.pop('message', None)
        if msg_str:
            msg_if = data.get('logentry')
            msg_meta = data.get('_meta', {}).get('message')

            if not msg_if:
                msg_if = data['logentry'] = {'message': msg_str}
                if msg_meta:
                    data.setdefault('_meta', {}).setdefault('logentry', {})['message'] = msg_meta

            if msg_if.get('message') != msg_str:
                if not msg_if.get('formatted'):
                    msg_if['formatted'] = msg_str
                    if msg_meta:
                        data.setdefault('_meta', {}).setdefault(
                            'logentry', {})['formatted'] = msg_meta

        # Fill in ip addresses marked as {{auto}}
        client_ip = request_env.get('client_ip')
        if client_ip:
            if get_path(data, ['sentry.interfaces.Http', 'env', 'REMOTE_ADDR']) == '{{auto}}':
                data['sentry.interfaces.Http']['env']['REMOTE_ADDR'] = client_ip

            if get_path(data, ['request', 'env', 'REMOTE_ADDR']) == '{{auto}}':
                data['request']['env']['REMOTE_ADDR'] = client_ip

            if get_path(data, ['sentry.interfaces.User', 'ip_address']) == '{{auto}}':
                data['sentry.interfaces.User']['ip_address'] = client_ip

            if get_path(data, ['user', 'ip_address']) == '{{auto}}':
                data['user']['ip_address'] = client_ip

        # Validate main event body and tags against schema.
        # XXX(ja): jsonschema does not like CanonicalKeyDict, so we need to pass
        #          in the inner data dict.
        is_valid, event_errors = validate_and_default_interface(data.data, 'event')
        errors.extend(event_errors)
        if 'tags' in data:
            is_valid, tag_errors = validate_and_default_interface(data['tags'], 'tags', name='tags')
            errors.extend(tag_errors)

        # Validate interfaces
        for k in list(iter(data)):
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.logger.debug('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.logger.debug('Ignored unknown attribute: %s', k)
                errors.append({'type': EventError.INVALID_ATTRIBUTE, 'name': k})
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                log = self.logger.debug if isinstance(
                    e, InterfaceValidationError) else self.logger.error
                log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True)
                errors.append({'type': EventError.INVALID_DATA, 'name': k, 'value': value})

        # Additional data coercion and defaulting
        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, int) or (isinstance(level, six.string_types) and level.isdigit()):
            level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL)
        data['level'] = LOG_LEVELS_MAP.get(level, LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL])

        if data.get('dist') and not data.get('release'):
            data['dist'] = None

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        # TODO (alex) can this all be replaced by utcnow?
        # it looks like the only time that this would even be hit is when timestamp
        # is not defined, as the earlier process_timestamp already converts existing
        # timestamps to floats.
        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        data.setdefault('checksum', None)
        data.setdefault('culprit', None)
        data.setdefault('dist', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('fingerprint', None)
        data.setdefault('logger', DEFAULT_LOGGER_NAME)
        data.setdefault('platform', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('tags', [])
        data.setdefault('transaction', None)

        # Fix case where legacy apps pass 'environment' as a tag
        # instead of a top level key.
        # TODO (alex) save() just reinserts the environment into the tags
        if not data.get('environment'):
            tagsdict = dict(data['tags'])
            if 'environment' in tagsdict:
                data['environment'] = tagsdict['environment']
                del tagsdict['environment']
                data['tags'] = tagsdict.items()

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key
        data['version'] = self.version

        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        # Exception mechanism needs SDK information to resolve proper names in
        # exception meta (such as signal names). "SDK Information" really means
        # the operating system version the event was generated on. Some
        # normalization still works without sdk_info, such as mach_exception
        # names (they can only occur on macOS).
        if exception:
            sdk_info = get_sdk_from_event(data)
            for ex in exception['values']:
                if 'mechanism' in ex:
                    normalize_mechanism_meta(ex['mechanism'], sdk_info)

        # If there is no User ip_addres, update it either from the Http interface
        # or the client_ip of the request.
        auth = request_env.get('auth')
        is_public = auth and auth.is_public
        add_ip_platforms = ('javascript', 'cocoa', 'objc')

        http_ip = data.get('sentry.interfaces.Http', {}).get('env', {}).get('REMOTE_ADDR')
        if http_ip:
            data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', http_ip)
        elif client_ip and (is_public or data.get('platform') in add_ip_platforms):
            data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', client_ip)

        # Trim values
        data['logger'] = trim(data['logger'].strip(), 64)
        trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['transaction']:
            data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH)

        return data
Exemple #18
0
    def normalize(self, request_env=None):
        request_env = request_env or {}
        data = self.data
        errors = data['errors'] = []

        # Before validating with a schema, attempt to cast values to their desired types
        # so that the schema doesn't have to take every type variation into account.
        text = six.text_type
        fp_types = six.string_types + six.integer_types + (float, )

        def to_values(v):
            return {'values': v} if v and isinstance(v, (tuple, list)) else v

        def convert_fingerprint(values):
            rv = values[:]
            bad_float = False
            for idx, item in enumerate(rv):
                if isinstance(item, float) and \
                   (abs(item) >= (1 << 53) or int(item) != item):
                    bad_float = True
                rv[idx] = text(item)
            if bad_float:
                metrics.incr(
                    'events.bad_float_fingerprint',
                    skip_internal=True,
                    tags={
                        'project_id': data.get('project'),
                    },
                )
            return rv

        casts = {
            'environment':
            lambda v: text(v) if v is not None else v,
            'fingerprint':
            lambda v: convert_fingerprint(v)
            if isinstance(v, list) and all(isinstance(f, fp_types)
                                           for f in v) else v,
            'release':
            lambda v: text(v) if v is not None else v,
            'dist':
            lambda v: text(v).strip() if v is not None else v,
            'time_spent':
            lambda v: int(v) if v is not None else v,
            'tags':
            lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip())
                       for (v_k, v_v) in dict(v).items()],
            'timestamp':
            lambda v: process_timestamp(v),
            'platform':
            lambda v: v if v in VALID_PLATFORMS else 'other',
            'sentry.interfaces.Message':
            lambda v: v if isinstance(v, dict) else {
                'message': v
            },

            # These can be sent as lists and need to be converted to {'values': [...]}
            'exception':
            to_values,
            'sentry.interfaces.Exception':
            to_values,
            'breadcrumbs':
            to_values,
            'sentry.interfaces.Breadcrumbs':
            to_values,
            'threads':
            to_values,
            'sentry.interfaces.Threads':
            to_values,
        }

        for c in casts:
            if c in data:
                try:
                    data[c] = casts[c](data[c])
                except InvalidTimestamp as it:
                    errors.append({
                        'type': it.args[0],
                        'name': c,
                        'value': data[c]
                    })
                    del data[c]
                except Exception as e:
                    errors.append({
                        'type': EventError.INVALID_DATA,
                        'name': c,
                        'value': data[c]
                    })
                    del data[c]

        # raw 'message' is coerced to the Message interface, as its used for pure index of
        # searchable strings. If both a raw 'message' and a Message interface exist, try and
        # add the former as the 'formatted' attribute of the latter.
        # See GH-3248
        msg_str = data.pop('message', None)
        if msg_str:
            msg_if = data.setdefault('sentry.interfaces.Message',
                                     {'message': msg_str})
            if msg_if.get('message') != msg_str:
                msg_if.setdefault('formatted', msg_str)

        # Fill in ip addresses marked as {{auto}}
        client_ip = request_env.get('client_ip')
        if client_ip:
            if get_path(data, ['sentry.interfaces.Http', 'env', 'REMOTE_ADDR'
                               ]) == '{{auto}}':
                data['sentry.interfaces.Http']['env'][
                    'REMOTE_ADDR'] = client_ip

            if get_path(data, ['request', 'env', 'REMOTE_ADDR']) == '{{auto}}':
                data['request']['env']['REMOTE_ADDR'] = client_ip

            if get_path(
                    data,
                ['sentry.interfaces.User', 'ip_address']) == '{{auto}}':
                data['sentry.interfaces.User']['ip_address'] = client_ip

            if get_path(data, ['user', 'ip_address']) == '{{auto}}':
                data['user']['ip_address'] = client_ip

        # Validate main event body and tags against schema
        is_valid, event_errors = validate_and_default_interface(data, 'event')
        errors.extend(event_errors)
        if 'tags' in data:
            is_valid, tag_errors = validate_and_default_interface(data['tags'],
                                                                  'tags',
                                                                  name='tags')
            errors.extend(tag_errors)

        # Validate interfaces
        for k in list(iter(data)):
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.logger.debug('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.logger.debug('Ignored unknown attribute: %s', k)
                errors.append({
                    'type': EventError.INVALID_ATTRIBUTE,
                    'name': k
                })
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                log = self.logger.debug if isinstance(
                    e, InterfaceValidationError) else self.logger.error
                log('Discarded invalid value for interface: %s (%r)',
                    k,
                    value,
                    exc_info=True)
                errors.append({
                    'type': EventError.INVALID_DATA,
                    'name': k,
                    'value': value
                })

        # Additional data coercion and defaulting
        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, int) or (isinstance(level, six.string_types)
                                      and level.isdigit()):
            level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL)
        data['level'] = LOG_LEVELS_MAP.get(level,
                                           LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL])

        if data.get('dist') and not data.get('release'):
            data['dist'] = None

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        # TODO (alex) can this all be replaced by utcnow?
        # it looks like the only time that this would even be hit is when timestamp
        # is not defined, as the earlier process_timestamp already converts existing
        # timestamps to floats.
        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        data.setdefault('checksum', None)
        data.setdefault('culprit', None)
        data.setdefault('dist', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('fingerprint', None)
        data.setdefault('logger', DEFAULT_LOGGER_NAME)
        data.setdefault('platform', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('tags', [])
        data.setdefault('transaction', None)

        # Fix case where legacy apps pass 'environment' as a tag
        # instead of a top level key.
        # TODO (alex) save() just reinserts the environment into the tags
        if not data.get('environment'):
            tagsdict = dict(data['tags'])
            if 'environment' in tagsdict:
                data['environment'] = tagsdict['environment']
                del tagsdict['environment']
                data['tags'] = tagsdict.items()

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key
        data['version'] = self.version

        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        # Exception mechanism needs SDK information to resolve proper names in
        # exception meta (such as signal names). "SDK Information" really means
        # the operating system version the event was generated on. Some
        # normalization still works without sdk_info, such as mach_exception
        # names (they can only occur on macOS).
        if exception:
            sdk_info = get_sdk_from_event(data)
            for ex in exception['values']:
                if 'mechanism' in ex:
                    normalize_mechanism_meta(ex['mechanism'], sdk_info)

        # If there is no User ip_addres, update it either from the Http interface
        # or the client_ip of the request.
        auth = request_env.get('auth')
        is_public = auth and auth.is_public
        add_ip_platforms = ('javascript', 'cocoa', 'objc')

        http_ip = data.get('sentry.interfaces.Http',
                           {}).get('env', {}).get('REMOTE_ADDR')
        if http_ip:
            data.setdefault('sentry.interfaces.User',
                            {}).setdefault('ip_address', http_ip)
        elif client_ip and (is_public
                            or data.get('platform') in add_ip_platforms):
            data.setdefault('sentry.interfaces.User',
                            {}).setdefault('ip_address', client_ip)

        # Trim values
        data['logger'] = trim(data['logger'].strip(), 64)
        trim_dict(data['extra'],
                  max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['transaction']:
            data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH)

        return data
Exemple #19
0
    def to_python(cls, data):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        return cls(**data)
    def to_python(cls, data):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET',
                              'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url'])

        query_string = data.get('query_string') or query_bit
        if query_string:
            # if querystring was a dict, convert it to a string
            if isinstance(query_string, dict):
                query_string = urlencode([(to_bytes(k), to_bytes(v))
                                          for k, v in query_string.items()])
            else:
                if query_string[0] == '?':
                    # remove '?' prefix
                    query_string = query_string[1:]
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = ''

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        headers = data.get('headers')
        if headers:
            headers, cookie_header = format_headers(headers)
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'),
                            None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
Exemple #21
0
    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if rust_renormalized:
            data.setdefault('query_string', [])
            for key in (
                "method",
                "url",
                "fragment",
                "cookies",
                "headers",
                "data",
                "env",
                "inferred_content_type",
            ):
                data.setdefault(key, None)
            return cls(**data)

        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET', 'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        if data.get('url', None):
            url = to_unicode(data['url'])
            # The JavaScript SDK used to send an ellipsis character for
            # truncated URLs. Canonical URLs do not contain UTF-8 characters in
            # either the path, query string or fragment, so we replace it with
            # three dots (which is the behavior of other SDKs). This effectively
            # makes the string two characters longer, but it will be trimmed
            # again down below.
            if url.endswith(u"\u2026"):
                url = url[:-1] + "..."
            scheme, netloc, path, query_bit, fragment_bit = urlsplit(url)
        else:
            scheme = netloc = path = query_bit = fragment_bit = None

        query_string = data.get('query_string') or query_bit
        if query_string:
            if isinstance(query_string, six.string_types):
                if query_string[0] == '?':
                    query_string = query_string[1:]
                if query_string.endswith(u"\u2026"):
                    query_string = query_string[:-1] + "..."
                query_string = [
                    (to_unicode(k), jsonify(v))
                    for k, v in parse_qsl(query_string, keep_blank_values=True)
                ]
            elif isinstance(query_string, dict):
                query_string = [(to_unicode(k), jsonify(v)) for k, v in six.iteritems(query_string)]
            elif isinstance(query_string, list):
                query_string = [
                    tuple(tup) for tup in query_string
                    if isinstance(tup, (tuple, list)) and len(tup) == 2
                ]
            else:
                query_string = []
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = []

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        if data.get("headers"):
            headers, cookie_header = format_headers(get_path(data, "headers", filter=True))
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'), None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
Exemple #22
0
    def to_python(cls, data, raw=False):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid stack frame data.")

        abs_path = data.get('abs_path')
        filename = data.get('filename')
        symbol = data.get('symbol')
        function = data.get('function')
        module = data.get('module')
        package = data.get('package')

        # For legacy reasons
        if function in ('?', ''):
            function = None

        # For consistency reasons
        if symbol in ('?', ''):
            symbol = None

        # Some of this processing should only be done for non raw frames
        if not raw:
            # absolute path takes priority over filename
            # (in the end both will get set)
            if not abs_path:
                abs_path = filename
                filename = None

            if not filename and abs_path:
                if is_url(abs_path):
                    urlparts = urlparse(abs_path)
                    if urlparts.path:
                        filename = urlparts.path
                    else:
                        filename = abs_path
                else:
                    filename = abs_path

        platform = data.get('platform')

        context_locals = data.get('vars') or {}
        if isinstance(context_locals, (list, tuple)):
            context_locals = dict(enumerate(context_locals))
        elif not isinstance(context_locals, dict):
            context_locals = {}
        context_locals = trim_dict(context_locals, object_hook=handle_nan)

        # extra data is used purely by internal systems,
        # so we dont trim it
        extra_data = data.get('data') or {}
        if isinstance(extra_data, (list, tuple)):
            extra_data = dict(enumerate(extra_data))

        # XXX: handle lines which were sent as 'null'
        context_line = trim(data.get('context_line'), 256)
        if context_line is not None:
            pre_context = data.get('pre_context', None)
            if pre_context:
                pre_context = [c or '' for c in pre_context]

            post_context = data.get('post_context', None)
            if post_context:
                post_context = [c or '' for c in post_context]
        else:
            pre_context, post_context = None, None

        in_app = validate_bool(data.get('in_app'), False)

        kwargs = {
            'abs_path': trim(abs_path, 2048),
            'filename': trim(filename, 256),
            'platform': platform,
            'module': trim(module, 256),
            'function': trim(function, 256),
            'package': package,
            'image_addr': to_hex_addr(data.get('image_addr')),
            'symbol': trim(symbol, 256),
            'symbol_addr': to_hex_addr(data.get('symbol_addr')),
            'instruction_addr': to_hex_addr(data.get('instruction_addr')),
            'trust': trim(data.get('trust'), 16),
            'in_app': in_app,
            'context_line': context_line,
            # TODO(dcramer): trim pre/post_context
            'pre_context': pre_context,
            'post_context': post_context,
            'vars': context_locals or None,
            'data': extra_data or None,
            'errors': data.get('errors'),
        }

        if data.get('lineno') is not None:
            lineno = int(data['lineno'])
            if lineno < 0:
                lineno = None
            kwargs['lineno'] = lineno
        else:
            kwargs['lineno'] = None

        if data.get('colno') is not None:
            kwargs['colno'] = int(data['colno'])
        else:
            kwargs['colno'] = None

        return cls(**kwargs)
Exemple #23
0
    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if rust_renormalized:
            data.setdefault('query_string', [])
            for key in (
                    "method",
                    "url",
                    "fragment",
                    "cookies",
                    "headers",
                    "data",
                    "env",
                    "inferred_content_type",
            ):
                data.setdefault(key, None)
            return cls(**data)

        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET',
                              'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        if data.get('url', None):
            url = to_unicode(data['url'])
            # The JavaScript SDK used to send an ellipsis character for
            # truncated URLs. Canonical URLs do not contain UTF-8 characters in
            # either the path, query string or fragment, so we replace it with
            # three dots (which is the behavior of other SDKs). This effectively
            # makes the string two characters longer, but it will be trimmed
            # again down below.
            if url.endswith(u"\u2026"):
                url = url[:-1] + "..."
            scheme, netloc, path, query_bit, fragment_bit = urlsplit(url)
        else:
            scheme = netloc = path = query_bit = fragment_bit = None

        query_string = data.get('query_string') or query_bit
        if query_string:
            if isinstance(query_string, six.string_types):
                if query_string[0] == '?':
                    query_string = query_string[1:]
                if query_string.endswith(u"\u2026"):
                    query_string = query_string[:-1] + "..."
                query_string = [
                    (to_unicode(k), jsonify(v))
                    for k, v in parse_qsl(query_string, keep_blank_values=True)
                ]
            elif isinstance(query_string, dict):
                query_string = [(to_unicode(k), jsonify(v))
                                for k, v in six.iteritems(query_string)]
            elif isinstance(query_string, list):
                query_string = [
                    tuple(tup) for tup in query_string
                    if isinstance(tup, (tuple, list)) and len(tup) == 2
                ]
            else:
                query_string = []
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = []

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        if data.get("headers"):
            headers, cookie_header = format_headers(
                get_path(data, "headers", filter=True))
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'),
                            None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
Exemple #24
0
    def normalize(self):
        data = self.data
        errors = data.get('errors', [])

        # Before validating with a schema, attempt to cast values to their desired types
        # so that the schema doesn't have to take every type variation into account.
        text = six.text_type
        fp_types = six.string_types + six.integer_types + (float, )

        def to_values(v):
            return {'values': v} if v and isinstance(v, (tuple, list)) else v

        casts = {
            'environment':
            lambda v: text(v) if v is not None else v,
            'fingerprint':
            lambda v: list(map(text, v))
            if isinstance(v, list) and all(isinstance(f, fp_types)
                                           for f in v) else v,
            'release':
            lambda v: text(v) if v is not None else v,
            'dist':
            lambda v: text(v).strip() if v is not None else v,
            'time_spent':
            lambda v: int(v) if v is not None else v,
            'tags':
            lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip())
                       for (v_k, v_v) in dict(v).items()],
            'timestamp':
            lambda v: process_timestamp(v),
            'platform':
            lambda v: v if v in VALID_PLATFORMS else 'other',

            # These can be sent as lists and need to be converted to {'values': [...]}
            'exception':
            to_values,
            'sentry.interfaces.Exception':
            to_values,
            'breadcrumbs':
            to_values,
            'sentry.interfaces.Breadcrumbs':
            to_values,
            'threads':
            to_values,
            'sentry.interfaces.Threads':
            to_values,
        }

        for c in casts:
            if c in data:
                try:
                    data[c] = casts[c](data[c])
                except Exception as e:
                    errors.append({
                        'type': EventError.INVALID_DATA,
                        'name': c,
                        'value': data[c]
                    })
                    del data[c]

        # raw 'message' is coerced to the Message interface, as its used for pure index of
        # searchable strings. If both a raw 'message' and a Message interface exist, try and
        # add the former as the 'formatted' attribute of the latter.
        # See GH-3248
        msg_str = data.pop('message', None)
        if msg_str:
            msg_if = data.setdefault('sentry.interfaces.Message',
                                     {'message': msg_str})
            if msg_if.get('message') != msg_str:
                msg_if.setdefault('formatted', msg_str)

        # Validate main event body and tags against schema
        is_valid, event_errors = validate_and_default_interface(data, 'event')
        errors.extend(event_errors)

        if 'tags' in data:
            is_valid, tag_errors = validate_and_default_interface(data['tags'],
                                                                  'tags',
                                                                  name='tags')
            errors.extend(tag_errors)

        # Validate interfaces
        for k in list(iter(data)):
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.logger.debug('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.logger.debug('Ignored unknown attribute: %s', k)
                errors.append({
                    'type': EventError.INVALID_ATTRIBUTE,
                    'name': k
                })
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                log = self.logger.debug if isinstance(
                    e, InterfaceValidationError) else self.logger.error
                log('Discarded invalid value for interface: %s (%r)',
                    k,
                    value,
                    exc_info=True)
                errors.append({
                    'type': EventError.INVALID_DATA,
                    'name': k,
                    'value': value
                })

        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, int) or (isinstance(level, six.string_types)
                                      and level.isdigit()):
            level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL)
        data['level'] = LOG_LEVELS_MAP.get(level,
                                           LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL])

        if data.get('dist') and not data.get('release'):
            data['dist'] = None

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        # TODO (alex) can this all be replaced by utcnow?
        # it looks like the only time that this would even be hit is when timestamp
        # is not defined, as the earlier process_timestamp already converts existing
        # timestamps to floats.
        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        data.setdefault('culprit', None)
        data.setdefault('transaction', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('checksum', None)
        data.setdefault('fingerprint', None)
        data.setdefault('platform', None)
        data.setdefault('dist', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('tags', [])

        # Fix case where legacy apps pass 'environment' as a tag
        # instead of a top level key.
        # TODO (alex) save() just reinserts the environment into the tags
        if not data.get('environment'):
            tagsdict = dict(data['tags'])
            if 'environment' in tagsdict:
                data['environment'] = tagsdict['environment']
                del tagsdict['environment']
                data['tags'] = tagsdict.items()

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key
        data['version'] = self.version

        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        if 'sentry.interfaces.Http' in data:
            try:
                ip_address = validate_ip(
                    data['sentry.interfaces.Http'].get('env',
                                                       {}).get('REMOTE_ADDR'),
                    required=False,
                )
                if ip_address:
                    data.setdefault('sentry.interfaces.User',
                                    {}).setdefault('ip_address', ip_address)
            except ValueError:
                pass

        # Trim values
        logger = data.get('logger', DEFAULT_LOGGER_NAME)
        data['logger'] = trim(logger.strip(), 64)

        trim_dict(data['extra'],
                  max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['transaction']:
            data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH)

        data['errors'] = errors

        return data
Exemple #25
0
    def to_python(cls, data):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        kwargs = {}

        if data.get('method'):
            method = data['method'].upper()
            # Optimize for the common path here, where it's a GET/POST, falling
            # back to a regular expresion test
            if method not in ('GET', 'POST') and not http_method_re.match(method):
                raise InterfaceValidationError("Invalid value for 'method'")
            kwargs['method'] = method
        else:
            kwargs['method'] = None

        if data.get('url', None):
            scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url'])
        else:
            scheme = netloc = path = query_bit = fragment_bit = None

        query_string = data.get('query_string') or query_bit
        if query_string:
            # if querystring was a dict, convert it to a string
            if isinstance(query_string, dict):
                query_string = urlencode(
                    [(to_bytes(k), to_bytes(v)) for k, v in query_string.items()]
                )
            else:
                if query_string[0] == '?':
                    # remove '?' prefix
                    query_string = query_string[1:]
            kwargs['query_string'] = trim(query_string, 4096)
        else:
            kwargs['query_string'] = ''

        fragment = data.get('fragment') or fragment_bit

        cookies = data.get('cookies')
        # if cookies were [also] included in headers we
        # strip them out
        headers = data.get('headers')
        if headers:
            headers, cookie_header = format_headers(headers)
            if not cookies and cookie_header:
                cookies = cookie_header
        else:
            headers = ()

        # We prefer the body to be a string, since we can then attempt to parse it
        # as JSON OR decode it as a URL encoded query string, without relying on
        # the correct content type header being passed.
        body = data.get('data')

        content_type = next((v for k, v in headers if k == 'Content-Type'), None)

        # Remove content type parameters
        if content_type is not None:
            content_type = content_type.partition(';')[0].rstrip()

        # We process request data once during ingestion and again when
        # requesting the http interface over the API. Avoid overwriting
        # decoding the body again.
        inferred_content_type = data.get('inferred_content_type', content_type)

        if 'inferred_content_type' not in data and not isinstance(body, dict):
            body, inferred_content_type = heuristic_decode(body, content_type)

        if body:
            body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE)

        env = data.get('env', {})
        # TODO (alex) This could also be accomplished with schema (with formats)
        if 'REMOTE_ADDR' in env:
            try:
                validate_ip(env['REMOTE_ADDR'], required=False)
            except ValueError:
                del env['REMOTE_ADDR']

        kwargs['inferred_content_type'] = inferred_content_type
        kwargs['cookies'] = trim_pairs(format_cookies(cookies))
        kwargs['env'] = trim_dict(env)
        kwargs['headers'] = trim_pairs(headers)
        kwargs['data'] = fix_broken_encoding(body)
        kwargs['url'] = urlunsplit((scheme, netloc, path, '', ''))
        kwargs['fragment'] = trim(fragment, 1024)

        return cls(**kwargs)
Exemple #26
0
    def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT):
        if rust_renormalized:
            for key in (
                'type',
                'synthetic',
                'description',
                'help_link',
                'handled',
                'data',
                'meta',
            ):
                data.setdefault(key, None)

            return cls(**data)

        data = upgrade_legacy_mechanism(data)
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid mechanism")

        if not data.get('type'):
            raise InterfaceValidationError("No 'type' present")

        mechanism_meta = data.get('meta') or {}
        mach_exception = mechanism_meta.get('mach_exception')
        if mach_exception is not None:
            mach_exception = prune_empty_keys({
                'exception': mach_exception['exception'],
                'code': mach_exception['code'],
                'subcode': mach_exception['subcode'],
                'name': mach_exception.get('name'),
            })

        signal = mechanism_meta.get('signal')
        if signal is not None:
            signal = prune_empty_keys({
                'number': signal['number'],
                'code': signal.get('code'),
                'name': signal.get('name'),
                'code_name': signal.get('code_name'),
            })

        errno = mechanism_meta.get('errno')
        if errno is not None:
            errno = prune_empty_keys({
                'number': errno['number'],
                'name': errno.get('name'),
            })

        kwargs = {
            'type': trim(data['type'], 128),
            'synthetic': data.get('synthetic'),
            'description': trim(data.get('description'), 1024),
            'help_link': trim(data.get('help_link'), 1024),
            'handled': data.get('handled'),
            'data': trim(data.get('data'), 4096),
            'meta': {
                'errno': errno,
                'mach_exception': mach_exception,
                'signal': signal,
            },
        }

        return cls(**kwargs)
Exemple #27
0
    def to_python(cls, data):
        is_valid, errors = validate_and_default_interface(data, cls.path)
        if not is_valid:
            raise InterfaceValidationError("Invalid interface data")

        return cls(**data)