Ejemplo n.º 1
0
def test_get_interface_allows_http():
    from sentry.interfaces.http import Http

    result = get_interface("request")
    assert result is Http
    result = get_interface("request")
    assert result is Http
Ejemplo n.º 2
0
    def post(self, request, project, helper, **kwargs):
        json_body = helper.safely_load_json_string(request.body)
        report_type = self.security_report_type(json_body)
        if report_type is None:
            raise APIError('Unrecognized security report type')
        interface = get_interface(report_type)

        try:
            instance = interface.from_raw(json_body)
        except jsonschema.ValidationError as e:
            raise APIError('Invalid security report: %s' % str(e).splitlines()[0])

        # Do origin check based on the `document-uri` key as explained in `_dispatch`.
        origin = instance.get_origin()
        if not is_valid_origin(origin, project):
            if project:
                tsdb.incr(tsdb.models.project_total_received_cors, project.id)
            raise APIForbidden('Invalid origin')

        data = {
            'interface': interface.path,
            'report': instance,
            'release': request.GET.get('sentry_release'),
            'environment': request.GET.get('sentry_environment'),
        }

        response_or_event_id = self.process(
            request, project=project, helper=helper, data=data, **kwargs
        )
        if isinstance(response_or_event_id, HttpResponse):
            return response_or_event_id
        return HttpResponse(content_type='application/javascript', status=201)
Ejemplo n.º 3
0
    def post(self, request, project, helper, key, **kwargs):
        json_body = safely_load_json_string(request.body)
        report_type = self.security_report_type(json_body)
        if report_type is None:
            raise APIError('Unrecognized security report type')
        interface = get_interface(report_type)

        try:
            instance = interface.from_raw(json_body)
        except jsonschema.ValidationError as e:
            raise APIError('Invalid security report: %s' %
                           str(e).splitlines()[0])

        # Do origin check based on the `document-uri` key as explained in `_dispatch`.
        origin = instance.get_origin()
        if not is_valid_origin(origin, project):
            if project:
                track_outcome(project.organization_id, project.id, key.id,
                              'invalid', FilterStatKeys.CORS)
            raise APIForbidden('Invalid origin')

        data = {
            'interface': interface.path,
            'report': instance,
            'release': request.GET.get('sentry_release'),
            'environment': request.GET.get('sentry_environment'),
        }

        self.process(request,
                     project=project,
                     helper=helper,
                     data=data,
                     key=key,
                     **kwargs)
        return HttpResponse(content_type='application/javascript', status=201)
Ejemplo n.º 4
0
    def should_filter(self):
        '''
        returns (result: bool, reason: string or None)
        Result is True if an event should be filtered
        The reason for filtering is passed along as a string
        so that we can store it in metrics
        '''
        for name in SECURITY_REPORT_INTERFACES:
            if name in self._data:
                interface = get_interface(name)
                if interface.to_python(self._data[name]).should_filter(self._project):
                    return (True, FilterStatKeys.INVALID_CSP)

        if self._client_ip and not is_valid_ip(self.relay_config, self._client_ip):
            return (True, FilterStatKeys.IP_ADDRESS)

        release = self._data.get('release')
        if release and not is_valid_release(self.relay_config, release):
            return (True, FilterStatKeys.RELEASE_VERSION)

        error_message = get_path(self._data, 'logentry', 'formatted') \
            or get_path(self._data, 'logentry', 'message') \
            or ''
        if error_message and not is_valid_error_message(self.relay_config, error_message):
            return (True, FilterStatKeys.ERROR_MESSAGE)

        for exc in get_path(self._data, 'exception', 'values', filter=True, default=[]):
            message = u': '.join(
                filter(None, map(exc.get, ['type', 'value']))
            )
            if message and not is_valid_error_message(self.relay_config, message):
                return (True, FilterStatKeys.ERROR_MESSAGE)

        return should_filter_event(self.relay_config, self._data)
Ejemplo n.º 5
0
    def post(self, request, project, helper, **kwargs):
        json_body = safely_load_json_string(request.body)
        report_type = self.security_report_type(json_body)
        if report_type is None:
            raise APIError('Unrecognized security report type')
        interface = get_interface(report_type)

        try:
            instance = interface.from_raw(json_body)
        except jsonschema.ValidationError as e:
            raise APIError('Invalid security report: %s' %
                           str(e).splitlines()[0])

        # Do origin check based on the `document-uri` key as explained in `_dispatch`.
        origin = instance.get_origin()
        if not is_valid_origin(origin, project):
            if project:
                tsdb.incr(tsdb.models.project_total_received_cors, project.id)
            raise APIForbidden('Invalid origin')

        data = {
            'interface': interface.path,
            'report': instance,
            'release': request.GET.get('sentry_release'),
            'environment': request.GET.get('sentry_environment'),
        }

        response_or_event_id = self.process(request,
                                            project=project,
                                            helper=helper,
                                            data=data,
                                            **kwargs)
        if isinstance(response_or_event_id, HttpResponse):
            return response_or_event_id
        return HttpResponse(content_type='application/javascript', status=201)
Ejemplo n.º 6
0
    def should_filter(self):
        """
        returns (result: bool, reason: string or None)
        Result is True if an event should be filtered
        The reason for filtering is passed along as a string
        so that we can store it in metrics
        """
        for name in SECURITY_REPORT_INTERFACES:
            if name in self._data:
                interface = get_interface(name)
                if interface.to_python(self._data[name]).should_filter(self._project):
                    return (True, FilterStatKeys.INVALID_CSP)

        if self._client_ip and not is_valid_ip(self.project_config, self._client_ip):
            return (True, FilterStatKeys.IP_ADDRESS)

        release = self._data.get("release")
        if release and not is_valid_release(self.project_config, release):
            return (True, FilterStatKeys.RELEASE_VERSION)

        error_message = (
            get_path(self._data, "logentry", "formatted")
            or get_path(self._data, "logentry", "message")
            or ""
        )
        if error_message and not is_valid_error_message(self.project_config, error_message):
            return (True, FilterStatKeys.ERROR_MESSAGE)

        for exc in get_path(self._data, "exception", "values", filter=True, default=[]):
            message = u": ".join(filter(None, map(exc.get, ["type", "value"])))
            if message and not is_valid_error_message(self.project_config, message):
                return (True, FilterStatKeys.ERROR_MESSAGE)

        return should_filter_event(self.project_config, self._data)
Ejemplo n.º 7
0
    def should_filter(self, project, data, ip_address=None):
        for name in self.report_interfaces:
            if name in data:
                interface = get_interface(name)
                if interface.to_python(data[name]).should_filter(project):
                    return (True, FilterStatKeys.INVALID_CSP)

        return super(SecurityApiHelper, self).should_filter(project, data, ip_address)
    def should_filter(self, project, data, ip_address=None):
        for name in self.report_interfaces:
            if name in data:
                interface = get_interface(name)
                if interface.to_python(data[name]).should_filter(project):
                    return (True, FilterStatKeys.INVALID_CSP)

        return super(SecurityApiHelper, self).should_filter(project, data, ip_address)
Ejemplo n.º 9
0
    def post(self, request, project, helper, key, project_config, **kwargs):
        json_body = safely_load_json_string(request.body)
        report_type = self.security_report_type(json_body)
        if report_type is None:
            track_outcome(
                project_config.organization_id,
                project_config.project_id,
                key.id,
                Outcome.INVALID,
                "security_report_type",
            )
            raise APIError("Unrecognized security report type")
        interface = get_interface(report_type)

        try:
            instance = interface.from_raw(json_body)
        except jsonschema.ValidationError as e:
            track_outcome(
                project_config.organization_id,
                project_config.project_id,
                key.id,
                Outcome.INVALID,
                "security_report",
            )
            raise APIError("Invalid security report: %s" % str(e).splitlines()[0])

        # Do origin check based on the `document-uri` key as explained in `_dispatch`.
        origin = instance.get_origin()
        if not is_valid_origin(origin, project):
            track_outcome(
                project_config.organization_id,
                project_config.project_id,
                key.id,
                Outcome.INVALID,
                FilterStatKeys.CORS,
            )
            raise APIForbidden("Invalid origin")

        data = {
            "interface": interface.path,
            "report": instance,
            "release": request.GET.get("sentry_release"),
            "environment": request.GET.get("sentry_environment"),
        }

        self.process(
            request,
            project=project,
            helper=helper,
            data=data,
            key=key,
            project_config=project_config,
            **kwargs
        )

        return HttpResponse(content_type="application/javascript", status=201)
Ejemplo n.º 10
0
    def process_csp_report(self):
        """Only called from the CSP report endpoint."""
        data = self._data

        try:
            interface = get_interface(data.pop('interface'))
            report = data.pop('report')
        except KeyError:
            raise APIForbidden('No report or interface data')

        # To support testing, we can either accept a built interface instance, or the raw data in
        # which case we build the instance ourselves
        try:
            instance = (
                report if isinstance(report, interface) else interface.from_raw(report)
            )
        except jsonschema.ValidationError as e:
            raise APIError('Invalid security report: %s' % str(e).splitlines()[0])

        def clean(d):
            return dict(filter(lambda x: x[1], d.items()))

        data.update(
            {
                'logger': 'csp',
                'message': instance.get_message(),
                'culprit': instance.get_culprit(),
                instance.path: instance.to_json(),
                'tags': instance.get_tags(),
                'errors': [],
                'user': {'ip_address': self._client_ip},
                # Construct a faux Http interface based on the little information we have
                # This is a bit weird, since we don't have nearly enough
                # information to create an Http interface, but
                # this automatically will pick up tags for the User-Agent
                # which is actually important here for CSP
                'request': {
                    'url': instance.get_origin(),
                    'headers': clean(
                        {
                            'User-Agent': self._user_agent,
                            'Referer': instance.get_referrer(),
                        }
                    ),
                },
            }
        )

        self._data = data
Ejemplo n.º 11
0
    def process_csp_report(self):
        """Only called from the CSP report endpoint."""
        data = self._data

        try:
            interface = get_interface(data.pop('interface'))
            report = data.pop('report')
        except KeyError:
            raise APIForbidden('No report or interface data')

        # To support testing, we can either accept a built interface instance, or the raw data in
        # which case we build the instance ourselves
        try:
            instance = (
                report if isinstance(report, interface) else interface.from_raw(report)
            )
        except jsonschema.ValidationError as e:
            raise APIError('Invalid security report: %s' % str(e).splitlines()[0])

        def clean(d):
            return dict(filter(lambda x: x[1], d.items()))

        data.update(
            {
                'logger': 'csp',
                'message': instance.get_message(),
                'culprit': instance.get_culprit(),
                instance.path: instance.to_json(),
                'tags': instance.get_tags(),
                'errors': [],
                'user': {'ip_address': self._client_ip},
                # Construct a faux Http interface based on the little information we have
                # This is a bit weird, since we don't have nearly enough
                # information to create an Http interface, but
                # this automatically will pick up tags for the User-Agent
                # which is actually important here for CSP
                'request': {
                    'url': instance.get_origin(),
                    'headers': clean(
                        {
                            'User-Agent': self._user_agent,
                            'Referer': instance.get_referrer(),
                        }
                    ),
                },
            }
        )

        self._data = data
Ejemplo n.º 12
0
    def process_csp_report(self):
        """Only called from the CSP report endpoint."""
        data = self._data

        try:
            interface = get_interface(data.pop("interface"))
            report = data.pop("report")
        except KeyError:
            raise APIForbidden("No report or interface data")

        # To support testing, we can either accept a built interface instance, or the raw data in
        # which case we build the instance ourselves
        try:
            instance = report if isinstance(
                report, interface) else interface.from_raw(report)
        except jsonschema.ValidationError as e:
            raise APIError("Invalid security report: %s" %
                           str(e).splitlines()[0])

        def clean(d):
            return dict([x for x in d.items() if x[1]])

        data.update({
            "logger": "csp",
            "message": instance.get_message(),
            "culprit": instance.get_culprit(),
            instance.path: instance.to_json(),
            "tags": instance.get_tags(),
            "errors": [],
            "user": {
                "ip_address": self._client_ip
            },
            # Construct a faux Http interface based on the little information we have
            # This is a bit weird, since we don't have nearly enough
            # information to create an Http interface, but
            # this automatically will pick up tags for the User-Agent
            # which is actually important here for CSP
            "request": {
                "url":
                instance.get_origin(),
                "headers":
                clean({
                    "User-Agent": self._user_agent,
                    "Referer": instance.get_referrer()
                }),
            },
        })

        self._data = data
Ejemplo n.º 13
0
    def interfaces(self):
        result = []
        for key, data in self.data.iteritems():
            try:
                cls = get_interface(key)
            except ValueError:
                continue

            value = safe_execute(cls.to_python, data)
            if not value:
                continue

            result.append((key, value))

        return SortedDict((k, v) for k, v in sorted(result, key=lambda x: x[1].get_score(), reverse=True))
Ejemplo n.º 14
0
    def interfaces(self):
        result = []
        for key, data in self.data.iteritems():
            try:
                cls = get_interface(key)
            except ValueError:
                continue

            value = safe_execute(cls.to_python, data)
            if not value:
                continue

            result.append((key, value))

        return SortedDict((k, v) for k, v in sorted(result, key=lambda x: x[1].get_score(), reverse=True))
Ejemplo n.º 15
0
Archivo: api.py Proyecto: yaoqi/sentry
    def post(self, request, project, helper, key, **kwargs):
        json_body = safely_load_json_string(request.body)
        report_type = self.security_report_type(json_body)
        if report_type is None:
            track_outcome(
                project.organization_id,
                project.id,
                key.id,
                Outcome.INVALID,
                "security_report_type")
            raise APIError('Unrecognized security report type')
        interface = get_interface(report_type)

        try:
            instance = interface.from_raw(json_body)
        except jsonschema.ValidationError as e:
            track_outcome(
                project.organization_id,
                project.id,
                key.id,
                Outcome.INVALID,
                "security_report")
            raise APIError('Invalid security report: %s' % str(e).splitlines()[0])

        # Do origin check based on the `document-uri` key as explained in `_dispatch`.
        origin = instance.get_origin()
        if not is_valid_origin(origin, project):
            if project:
                track_outcome(
                    project.organization_id,
                    project.id,
                    key.id,
                    Outcome.INVALID,
                    FilterStatKeys.CORS)
            raise APIForbidden('Invalid origin')

        data = {
            'interface': interface.path,
            'report': instance,
            'release': request.GET.get('sentry_release'),
            'environment': request.GET.get('sentry_environment'),
        }

        self.process(request, project=project, helper=helper, data=data, key=key, **kwargs)
        return HttpResponse(content_type='application/javascript', status=201)
Ejemplo n.º 16
0
    def should_filter(self):
        '''
        returns (result: bool, reason: string or None)
        Result is True if an event should be filtered
        The reason for filtering is passed along as a string
        so that we can store it in metrics
        '''
        for name in SECURITY_REPORT_INTERFACES:
            if name in self._data:
                interface = get_interface(name)
                if interface.to_python(self._data[name]).should_filter(self._project):
                    return (True, FilterStatKeys.INVALID_CSP)

        if self._client_ip and not is_valid_ip(self._project, self._client_ip):
            return (True, FilterStatKeys.IP_ADDRESS)

        release = self._data.get('release')
        if release and not is_valid_release(self._project, release):
            return (True, FilterStatKeys.RELEASE_VERSION)

        error_message = get_path(self._data, 'logentry', 'formatted') \
            or get_path(self._data, 'logentry', 'message') \
            or ''
        if error_message and not is_valid_error_message(self._project, error_message):
            return (True, FilterStatKeys.ERROR_MESSAGE)

        for exc in get_path(self._data, 'exception', 'values', filter=True, default=[]):
            message = u': '.join(
                filter(None, map(exc.get, ['type', 'value']))
            )
            if message and not is_valid_error_message(self._project, message):
                return (True, FilterStatKeys.ERROR_MESSAGE)

        for filter_cls in filters.all():
            filter_obj = filter_cls(self._project)
            if filter_obj.is_enabled() and filter_obj.test(self._data):
                return (True, six.text_type(filter_obj.id))

        return (False, None)
Ejemplo n.º 17
0
 def test_allows_http(self):
     from sentry.interfaces.http import Http
     result = get_interface('sentry.interfaces.Http')
     assert result is Http
     result = get_interface('request')
     assert result is Http
Ejemplo n.º 18
0
 def test_does_not_let_through_disallowed_name(self):
     with self.assertRaises(ValueError):
         get_interface('subprocess')
Ejemplo n.º 19
0
    def normalize(self, request_env=None):
        request_env = request_env or {}
        data = self.data
        errors = data['errors'] = []

        # Ignore event meta data for now.
        data.pop('_meta', None)

        # Before validating with a schema, attempt to cast values to their desired types
        # so that the schema doesn't have to take every type variation into account.
        text = six.text_type
        fp_types = six.string_types + six.integer_types + (float, )

        def to_values(v):
            return {'values': v} if v and isinstance(v, (tuple, list)) else v

        def stringify(f):
            if isinstance(f, float):
                return text(int(f)) if abs(f) < (1 << 53) else None
            return text(f)

        casts = {
            'environment': lambda v: text(v) if v is not None else v,
            'fingerprint': lambda v: list(x for x in map(stringify, v) if x is not None) if isinstance(v, list) and all(isinstance(f, fp_types) for f in v) else v,
            'release': lambda v: text(v) if v is not None else v,
            'dist': lambda v: text(v).strip() if v is not None else v,
            'time_spent': lambda v: int(v) if v is not None else v,
            'tags': lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip()) for (v_k, v_v) in dict(v).items()],
            'timestamp': lambda v: process_timestamp(v),
            'platform': lambda v: v if v in VALID_PLATFORMS else 'other',
            'logentry': lambda v: v if isinstance(v, dict) else {'message': v},

            # These can be sent as lists and need to be converted to {'values': [...]}
            'exception': to_values,
            'breadcrumbs': to_values,
            'threads': to_values,
        }

        for c in casts:
            if c in data:
                try:
                    data[c] = casts[c](data[c])
                except InvalidTimestamp as it:
                    errors.append({'type': it.args[0], 'name': c, 'value': data[c]})
                    del data[c]
                except Exception as e:
                    errors.append({'type': EventError.INVALID_DATA, 'name': c, 'value': data[c]})
                    del data[c]

        # raw 'message' is coerced to the Message interface, as its used for pure index of
        # searchable strings. If both a raw 'message' and a Message interface exist, try and
        # add the former as the 'formatted' attribute of the latter.
        # See GH-3248
        msg_str = data.pop('message', None)
        if msg_str:
            msg_if = data.get('logentry')
            msg_meta = data.get('_meta', {}).get('message')

            if not msg_if:
                msg_if = data['logentry'] = {'message': msg_str}
                if msg_meta:
                    data.setdefault('_meta', {}).setdefault('logentry', {})['message'] = msg_meta

            if msg_if.get('message') != msg_str:
                if not msg_if.get('formatted'):
                    msg_if['formatted'] = msg_str
                    if msg_meta:
                        data.setdefault('_meta', {}).setdefault(
                            'logentry', {})['formatted'] = msg_meta

        # Fill in ip addresses marked as {{auto}}
        client_ip = request_env.get('client_ip')
        if client_ip:
            if get_path(data, ['sentry.interfaces.Http', 'env', 'REMOTE_ADDR']) == '{{auto}}':
                data['sentry.interfaces.Http']['env']['REMOTE_ADDR'] = client_ip

            if get_path(data, ['request', 'env', 'REMOTE_ADDR']) == '{{auto}}':
                data['request']['env']['REMOTE_ADDR'] = client_ip

            if get_path(data, ['sentry.interfaces.User', 'ip_address']) == '{{auto}}':
                data['sentry.interfaces.User']['ip_address'] = client_ip

            if get_path(data, ['user', 'ip_address']) == '{{auto}}':
                data['user']['ip_address'] = client_ip

        # Validate main event body and tags against schema.
        # XXX(ja): jsonschema does not like CanonicalKeyDict, so we need to pass
        #          in the inner data dict.
        is_valid, event_errors = validate_and_default_interface(data.data, 'event')
        errors.extend(event_errors)
        if 'tags' in data:
            is_valid, tag_errors = validate_and_default_interface(data['tags'], 'tags', name='tags')
            errors.extend(tag_errors)

        # Validate interfaces
        for k in list(iter(data)):
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.logger.debug('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.logger.debug('Ignored unknown attribute: %s', k)
                errors.append({'type': EventError.INVALID_ATTRIBUTE, 'name': k})
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                log = self.logger.debug if isinstance(
                    e, InterfaceValidationError) else self.logger.error
                log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True)
                errors.append({'type': EventError.INVALID_DATA, 'name': k, 'value': value})

        # Additional data coercion and defaulting
        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, int) or (isinstance(level, six.string_types) and level.isdigit()):
            level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL)
        data['level'] = LOG_LEVELS_MAP.get(level, LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL])

        if data.get('dist') and not data.get('release'):
            data['dist'] = None

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        # TODO (alex) can this all be replaced by utcnow?
        # it looks like the only time that this would even be hit is when timestamp
        # is not defined, as the earlier process_timestamp already converts existing
        # timestamps to floats.
        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        data.setdefault('checksum', None)
        data.setdefault('culprit', None)
        data.setdefault('dist', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('fingerprint', None)
        data.setdefault('logger', DEFAULT_LOGGER_NAME)
        data.setdefault('platform', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('tags', [])
        data.setdefault('transaction', None)

        # Fix case where legacy apps pass 'environment' as a tag
        # instead of a top level key.
        # TODO (alex) save() just reinserts the environment into the tags
        if not data.get('environment'):
            tagsdict = dict(data['tags'])
            if 'environment' in tagsdict:
                data['environment'] = tagsdict['environment']
                del tagsdict['environment']
                data['tags'] = tagsdict.items()

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key
        data['version'] = self.version

        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        # Exception mechanism needs SDK information to resolve proper names in
        # exception meta (such as signal names). "SDK Information" really means
        # the operating system version the event was generated on. Some
        # normalization still works without sdk_info, such as mach_exception
        # names (they can only occur on macOS).
        if exception:
            sdk_info = get_sdk_from_event(data)
            for ex in exception['values']:
                if 'mechanism' in ex:
                    normalize_mechanism_meta(ex['mechanism'], sdk_info)

        # If there is no User ip_addres, update it either from the Http interface
        # or the client_ip of the request.
        auth = request_env.get('auth')
        is_public = auth and auth.is_public
        add_ip_platforms = ('javascript', 'cocoa', 'objc')

        http_ip = data.get('sentry.interfaces.Http', {}).get('env', {}).get('REMOTE_ADDR')
        if http_ip:
            data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', http_ip)
        elif client_ip and (is_public or data.get('platform') in add_ip_platforms):
            data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', client_ip)

        # Trim values
        data['logger'] = trim(data['logger'].strip(), 64)
        trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['transaction']:
            data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH)

        return data
Ejemplo n.º 20
0
    def validate_data(self, project, data):
        # TODO(dcramer): move project out of the data packet
        data['project'] = project.id

        data.setdefault('errors', [])

        if not data.get('message'):
            data['message'] = '<no message value>'
        elif not isinstance(data['message'], six.string_types):
            raise APIForbidden('Invalid value for message')

        if data.get('culprit'):
            if not isinstance(data['culprit'], six.string_types):
                raise APIForbidden('Invalid value for culprit')

        if not data.get('event_id'):
            data['event_id'] = uuid.uuid4().hex
        elif not isinstance(data['event_id'], six.string_types):
            raise APIForbidden('Invalid value for event_id')

        if len(data['event_id']) > 32:
            self.log.info(
                'Discarded value for event_id due to length (%d chars)',
                len(data['event_id']))
            data['errors'].append({
                'type': EventError.VALUE_TOO_LONG,
                'name': 'event_id',
                'value': data['event_id'],
            })
            data['event_id'] = uuid.uuid4().hex

        if 'timestamp' in data:
            try:
                self._process_data_timestamp(data)
            except InvalidTimestamp as e:
                self.log.info('Discarded invalid value for timestamp: %r',
                              data['timestamp'],
                              exc_info=True)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'timestamp',
                    'value': data['timestamp'],
                })
                del data['timestamp']

        if data.get('modules') and type(data['modules']) != dict:
            self.log.info('Discarded invalid type for modules: %s',
                          type(data['modules']))
            data['errors'].append({
                'type': EventError.INVALID_DATA,
                'name': 'modules',
                'value': data['modules'],
            })
            del data['modules']

        if data.get('extra') is not None and type(data['extra']) != dict:
            self.log.info('Discarded invalid type for extra: %s',
                          type(data['extra']))
            data['errors'].append({
                'type': EventError.INVALID_DATA,
                'name': 'extra',
                'value': data['extra'],
            })
            del data['extra']

        if data.get('tags') is not None:
            if type(data['tags']) == dict:
                data['tags'] = data['tags'].items()
            elif not isinstance(data['tags'], (list, tuple)):
                self.log.info('Discarded invalid type for tags: %s',
                              type(data['tags']))
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'tags',
                    'value': data['tags'],
                })
                del data['tags']

        if data.get('tags'):
            # remove any values which are over 32 characters
            tags = []
            for pair in data['tags']:
                try:
                    k, v = pair
                except ValueError:
                    self.log.info('Discarded invalid tag value: %r', pair)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                if not isinstance(k, six.string_types):
                    try:
                        k = six.text_type(k)
                    except Exception:
                        self.log.info('Discarded invalid tag key: %r', type(k))
                        data['errors'].append({
                            'type': EventError.INVALID_DATA,
                            'name': 'tags',
                            'value': pair,
                        })
                        continue

                if not isinstance(v, six.string_types):
                    try:
                        v = six.text_type(v)
                    except Exception:
                        self.log.info('Discarded invalid tag value: %s=%r', k,
                                      type(v))
                        data['errors'].append({
                            'type': EventError.INVALID_DATA,
                            'name': 'tags',
                            'value': pair,
                        })
                        continue

                if len(k) > MAX_TAG_KEY_LENGTH or len(
                        v) > MAX_TAG_VALUE_LENGTH:
                    self.log.info('Discarded invalid tag: %s=%s', k, v)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue
                tags.append((k, v))
            data['tags'] = tags

        for k in data.keys():
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.log.info('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.log.info('Ignored unknown attribute: %s', k)
                data['errors'].append({
                    'type': EventError.INVALID_ATTRIBUTE,
                    'name': k,
                })
                continue

            if type(value) != dict:
                # HACK(dcramer): the exception interface supports a list as the
                # value. We should change this in a new protocol version.
                if type(value) in (list, tuple):
                    value = {'values': value}
                else:
                    self.log.info('Invalid parameter for value: %s (%r)', k,
                                  type(value))
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': k,
                        'value': value,
                    })
                    continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                if isinstance(e, AssertionError):
                    log = self.log.info
                else:
                    log = self.log.error
                log('Discarded invalid value for interface: %s (%r)',
                    k,
                    value,
                    exc_info=True)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': k,
                    'value': value,
                })

        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, six.string_types) and not level.isdigit():
            # assume it's something like 'warning'
            try:
                data['level'] = LOG_LEVEL_REVERSE_MAP[level]
            except KeyError as e:
                self.log.info('Discarded invalid logger value: %s', level)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'level',
                    'value': level,
                })
                data['level'] = LOG_LEVEL_REVERSE_MAP.get(
                    DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL)

        if data.get('release'):
            data['release'] = unicode(data['release'])
            if len(data['release']) > 64:
                data['errors'].append({
                    'type': EventError.VALUE_TOO_LONG,
                    'name': 'release',
                    'value': data['release'],
                })
                del data['release']

        return data
Ejemplo n.º 21
0
    def normalize(self, request_env=None):
        request_env = request_env or {}
        data = self.data
        errors = data['errors'] = []

        # Before validating with a schema, attempt to cast values to their desired types
        # so that the schema doesn't have to take every type variation into account.
        text = six.text_type
        fp_types = six.string_types + six.integer_types + (float, )

        def to_values(v):
            return {'values': v} if v and isinstance(v, (tuple, list)) else v

        def convert_fingerprint(values):
            rv = values[:]
            bad_float = False
            for idx, item in enumerate(rv):
                if isinstance(item, float) and \
                   (abs(item) >= (1 << 53) or int(item) != item):
                    bad_float = True
                rv[idx] = text(item)
            if bad_float:
                metrics.incr(
                    'events.bad_float_fingerprint',
                    skip_internal=True,
                    tags={
                        'project_id': data.get('project'),
                    },
                )
            return rv

        casts = {
            'environment':
            lambda v: text(v) if v is not None else v,
            'fingerprint':
            lambda v: convert_fingerprint(v)
            if isinstance(v, list) and all(isinstance(f, fp_types)
                                           for f in v) else v,
            'release':
            lambda v: text(v) if v is not None else v,
            'dist':
            lambda v: text(v).strip() if v is not None else v,
            'time_spent':
            lambda v: int(v) if v is not None else v,
            'tags':
            lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip())
                       for (v_k, v_v) in dict(v).items()],
            'timestamp':
            lambda v: process_timestamp(v),
            'platform':
            lambda v: v if v in VALID_PLATFORMS else 'other',
            'sentry.interfaces.Message':
            lambda v: v if isinstance(v, dict) else {
                'message': v
            },

            # These can be sent as lists and need to be converted to {'values': [...]}
            'exception':
            to_values,
            'sentry.interfaces.Exception':
            to_values,
            'breadcrumbs':
            to_values,
            'sentry.interfaces.Breadcrumbs':
            to_values,
            'threads':
            to_values,
            'sentry.interfaces.Threads':
            to_values,
        }

        for c in casts:
            if c in data:
                try:
                    data[c] = casts[c](data[c])
                except InvalidTimestamp as it:
                    errors.append({
                        'type': it.args[0],
                        'name': c,
                        'value': data[c]
                    })
                    del data[c]
                except Exception as e:
                    errors.append({
                        'type': EventError.INVALID_DATA,
                        'name': c,
                        'value': data[c]
                    })
                    del data[c]

        # raw 'message' is coerced to the Message interface, as its used for pure index of
        # searchable strings. If both a raw 'message' and a Message interface exist, try and
        # add the former as the 'formatted' attribute of the latter.
        # See GH-3248
        msg_str = data.pop('message', None)
        if msg_str:
            msg_if = data.setdefault('sentry.interfaces.Message',
                                     {'message': msg_str})
            if msg_if.get('message') != msg_str:
                msg_if.setdefault('formatted', msg_str)

        # Fill in ip addresses marked as {{auto}}
        client_ip = request_env.get('client_ip')
        if client_ip:
            if get_path(data, ['sentry.interfaces.Http', 'env', 'REMOTE_ADDR'
                               ]) == '{{auto}}':
                data['sentry.interfaces.Http']['env'][
                    'REMOTE_ADDR'] = client_ip

            if get_path(data, ['request', 'env', 'REMOTE_ADDR']) == '{{auto}}':
                data['request']['env']['REMOTE_ADDR'] = client_ip

            if get_path(
                    data,
                ['sentry.interfaces.User', 'ip_address']) == '{{auto}}':
                data['sentry.interfaces.User']['ip_address'] = client_ip

            if get_path(data, ['user', 'ip_address']) == '{{auto}}':
                data['user']['ip_address'] = client_ip

        # Validate main event body and tags against schema
        is_valid, event_errors = validate_and_default_interface(data, 'event')
        errors.extend(event_errors)
        if 'tags' in data:
            is_valid, tag_errors = validate_and_default_interface(data['tags'],
                                                                  'tags',
                                                                  name='tags')
            errors.extend(tag_errors)

        # Validate interfaces
        for k in list(iter(data)):
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.logger.debug('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.logger.debug('Ignored unknown attribute: %s', k)
                errors.append({
                    'type': EventError.INVALID_ATTRIBUTE,
                    'name': k
                })
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                log = self.logger.debug if isinstance(
                    e, InterfaceValidationError) else self.logger.error
                log('Discarded invalid value for interface: %s (%r)',
                    k,
                    value,
                    exc_info=True)
                errors.append({
                    'type': EventError.INVALID_DATA,
                    'name': k,
                    'value': value
                })

        # Additional data coercion and defaulting
        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, int) or (isinstance(level, six.string_types)
                                      and level.isdigit()):
            level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL)
        data['level'] = LOG_LEVELS_MAP.get(level,
                                           LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL])

        if data.get('dist') and not data.get('release'):
            data['dist'] = None

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        # TODO (alex) can this all be replaced by utcnow?
        # it looks like the only time that this would even be hit is when timestamp
        # is not defined, as the earlier process_timestamp already converts existing
        # timestamps to floats.
        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        data.setdefault('checksum', None)
        data.setdefault('culprit', None)
        data.setdefault('dist', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('fingerprint', None)
        data.setdefault('logger', DEFAULT_LOGGER_NAME)
        data.setdefault('platform', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('tags', [])
        data.setdefault('transaction', None)

        # Fix case where legacy apps pass 'environment' as a tag
        # instead of a top level key.
        # TODO (alex) save() just reinserts the environment into the tags
        if not data.get('environment'):
            tagsdict = dict(data['tags'])
            if 'environment' in tagsdict:
                data['environment'] = tagsdict['environment']
                del tagsdict['environment']
                data['tags'] = tagsdict.items()

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key
        data['version'] = self.version

        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        # Exception mechanism needs SDK information to resolve proper names in
        # exception meta (such as signal names). "SDK Information" really means
        # the operating system version the event was generated on. Some
        # normalization still works without sdk_info, such as mach_exception
        # names (they can only occur on macOS).
        if exception:
            sdk_info = get_sdk_from_event(data)
            for ex in exception['values']:
                if 'mechanism' in ex:
                    normalize_mechanism_meta(ex['mechanism'], sdk_info)

        # If there is no User ip_addres, update it either from the Http interface
        # or the client_ip of the request.
        auth = request_env.get('auth')
        is_public = auth and auth.is_public
        add_ip_platforms = ('javascript', 'cocoa', 'objc')

        http_ip = data.get('sentry.interfaces.Http',
                           {}).get('env', {}).get('REMOTE_ADDR')
        if http_ip:
            data.setdefault('sentry.interfaces.User',
                            {}).setdefault('ip_address', http_ip)
        elif client_ip and (is_public
                            or data.get('platform') in add_ip_platforms):
            data.setdefault('sentry.interfaces.User',
                            {}).setdefault('ip_address', client_ip)

        # Trim values
        data['logger'] = trim(data['logger'].strip(), 64)
        trim_dict(data['extra'],
                  max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['transaction']:
            data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH)

        return data
Ejemplo n.º 22
0
    def normalize(self):
        data = self.data
        errors = data.get('errors', [])

        # Before validating with a schema, attempt to cast values to their desired types
        # so that the schema doesn't have to take every type variation into account.
        text = six.text_type
        fp_types = six.string_types + six.integer_types + (float, )

        def to_values(v):
            return {'values': v} if v and isinstance(v, (tuple, list)) else v

        casts = {
            'environment':
            lambda v: text(v) if v is not None else v,
            'fingerprint':
            lambda v: list(map(text, v))
            if isinstance(v, list) and all(isinstance(f, fp_types)
                                           for f in v) else v,
            'release':
            lambda v: text(v) if v is not None else v,
            'dist':
            lambda v: text(v).strip() if v is not None else v,
            'time_spent':
            lambda v: int(v) if v is not None else v,
            'tags':
            lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip())
                       for (v_k, v_v) in dict(v).items()],
            'timestamp':
            lambda v: process_timestamp(v),
            'platform':
            lambda v: v if v in VALID_PLATFORMS else 'other',

            # These can be sent as lists and need to be converted to {'values': [...]}
            'exception':
            to_values,
            'sentry.interfaces.Exception':
            to_values,
            'breadcrumbs':
            to_values,
            'sentry.interfaces.Breadcrumbs':
            to_values,
            'threads':
            to_values,
            'sentry.interfaces.Threads':
            to_values,
        }

        for c in casts:
            if c in data:
                try:
                    data[c] = casts[c](data[c])
                except Exception as e:
                    errors.append({
                        'type': EventError.INVALID_DATA,
                        'name': c,
                        'value': data[c]
                    })
                    del data[c]

        # raw 'message' is coerced to the Message interface, as its used for pure index of
        # searchable strings. If both a raw 'message' and a Message interface exist, try and
        # add the former as the 'formatted' attribute of the latter.
        # See GH-3248
        msg_str = data.pop('message', None)
        if msg_str:
            msg_if = data.setdefault('sentry.interfaces.Message',
                                     {'message': msg_str})
            if msg_if.get('message') != msg_str:
                msg_if.setdefault('formatted', msg_str)

        # Validate main event body and tags against schema
        is_valid, event_errors = validate_and_default_interface(data, 'event')
        errors.extend(event_errors)

        if 'tags' in data:
            is_valid, tag_errors = validate_and_default_interface(data['tags'],
                                                                  'tags',
                                                                  name='tags')
            errors.extend(tag_errors)

        # Validate interfaces
        for k in list(iter(data)):
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.logger.debug('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.logger.debug('Ignored unknown attribute: %s', k)
                errors.append({
                    'type': EventError.INVALID_ATTRIBUTE,
                    'name': k
                })
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                log = self.logger.debug if isinstance(
                    e, InterfaceValidationError) else self.logger.error
                log('Discarded invalid value for interface: %s (%r)',
                    k,
                    value,
                    exc_info=True)
                errors.append({
                    'type': EventError.INVALID_DATA,
                    'name': k,
                    'value': value
                })

        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, int) or (isinstance(level, six.string_types)
                                      and level.isdigit()):
            level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL)
        data['level'] = LOG_LEVELS_MAP.get(level,
                                           LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL])

        if data.get('dist') and not data.get('release'):
            data['dist'] = None

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        # TODO (alex) can this all be replaced by utcnow?
        # it looks like the only time that this would even be hit is when timestamp
        # is not defined, as the earlier process_timestamp already converts existing
        # timestamps to floats.
        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        data.setdefault('culprit', None)
        data.setdefault('transaction', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('checksum', None)
        data.setdefault('fingerprint', None)
        data.setdefault('platform', None)
        data.setdefault('dist', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('tags', [])

        # Fix case where legacy apps pass 'environment' as a tag
        # instead of a top level key.
        # TODO (alex) save() just reinserts the environment into the tags
        if not data.get('environment'):
            tagsdict = dict(data['tags'])
            if 'environment' in tagsdict:
                data['environment'] = tagsdict['environment']
                del tagsdict['environment']
                data['tags'] = tagsdict.items()

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key
        data['version'] = self.version

        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        if 'sentry.interfaces.Http' in data:
            try:
                ip_address = validate_ip(
                    data['sentry.interfaces.Http'].get('env',
                                                       {}).get('REMOTE_ADDR'),
                    required=False,
                )
                if ip_address:
                    data.setdefault('sentry.interfaces.User',
                                    {}).setdefault('ip_address', ip_address)
            except ValueError:
                pass

        # Trim values
        logger = data.get('logger', DEFAULT_LOGGER_NAME)
        data['logger'] = trim(logger.strip(), 64)

        trim_dict(data['extra'],
                  max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['transaction']:
            data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH)

        data['errors'] = errors

        return data
Ejemplo n.º 23
0
    def normalize(self):
        # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip
        # First we pull out our top-level (non-data attr) kwargs
        data = self.data

        if not isinstance(data.get('level'), (six.string_types, int)):
            data['level'] = logging.ERROR
        elif data['level'] not in LOG_LEVELS:
            data['level'] = logging.ERROR

        if not data.get('logger'):
            data['logger'] = DEFAULT_LOGGER_NAME
        else:
            logger = trim(data['logger'].strip(), 64)
            if TagKey.is_valid_key(logger):
                data['logger'] = logger
            else:
                data['logger'] = DEFAULT_LOGGER_NAME

        if data.get('platform'):
            data['platform'] = trim(data['platform'], 64)

        current_timestamp = timezone.now()
        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = current_timestamp

        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        if not data.get('event_id'):
            data['event_id'] = uuid4().hex

        data.setdefault('culprit', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('checksum', None)
        data.setdefault('fingerprint', None)
        data.setdefault('platform', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('errors', [])

        tags = data.get('tags')
        if not tags:
            tags = []
        # full support for dict syntax
        elif isinstance(tags, dict):
            tags = list(tags.items())
        # prevent [tag, tag, tag] (invalid) syntax
        elif not all(len(t) == 2 for t in tags):
            tags = []
        else:
            tags = list(tags)

        data['tags'] = []
        for key, value in tags:
            key = six.text_type(key).strip()
            value = six.text_type(value).strip()
            if not (key and value):
                continue

            # XXX(dcramer): many legacy apps are using the environment tag
            # rather than the key itself
            if key == 'environment' and not data.get('environment'):
                data['environment'] = value
            else:
                data['tags'].append((key, value))

        if not isinstance(data['extra'], dict):
            # throw it away
            data['extra'] = {}

        trim_dict(
            data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        # TODO(dcramer): more of validate data needs stuffed into the manager
        for key in list(iter(data)):
            if key in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(key)

            try:
                interface = get_interface(key)()
            except ValueError:
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception:
                # XXX: we should consider logging this.
                pass

        # TODO(dcramer): this logic is duplicated in ``validate_data`` from
        # coreapi

        # message is coerced to an interface, as its used for pure
        # index of searchable strings
        # See GH-3248
        message = data.pop('message', None)
        if message:
            if 'sentry.interfaces.Message' not in data:
                interface = get_interface('sentry.interfaces.Message')
                try:
                    inst = interface.to_python({
                        'message': message,
                    })
                    data[inst.get_path()] = inst.to_json()
                except Exception:
                    pass
            elif not data['sentry.interfaces.Message'].get('formatted'):
                interface = get_interface('sentry.interfaces.Message')
                try:
                    inst = interface.to_python(dict(
                        data['sentry.interfaces.Message'],
                        formatted=message,
                    ))
                    data[inst.get_path()] = inst.to_json()
                except Exception:
                    pass

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key

        data['version'] = self.version

        # TODO(dcramer): find a better place for this logic
        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        if 'sentry.interfaces.Http' in data:
            try:
                ip_address = validate_ip(
                    data['sentry.interfaces.Http'].get(
                        'env', {}).get('REMOTE_ADDR'),
                    required=False,
                )
            except ValueError:
                ip_address = None
            if ip_address:
                data.setdefault('sentry.interfaces.User', {})
                data['sentry.interfaces.User'].setdefault(
                    'ip_address', ip_address)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        return data
Ejemplo n.º 24
0
 def test_does_not_let_through_disallowed_name(self):
     with self.assertRaises(ValueError):
         get_interface('subprocess')
Ejemplo n.º 25
0
    def normalize(self):
        # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip
        # First we pull out our top-level (non-data attr) kwargs
        data = self.data

        if not isinstance(data.get('level'), (six.string_types, int)):
            data['level'] = logging.ERROR
        elif data['level'] not in LOG_LEVELS:
            data['level'] = logging.ERROR

        if not data.get('logger') or not isinstance(data.get('logger'), six.string_types):
            data['logger'] = DEFAULT_LOGGER_NAME
        else:
            logger = trim(data['logger'].strip(), 64)
            if tagstore.is_valid_key(logger):
                data['logger'] = logger
            else:
                data['logger'] = DEFAULT_LOGGER_NAME

        if data.get('platform'):
            data['platform'] = trim(data['platform'], 64)

        current_timestamp = timezone.now()
        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = current_timestamp

        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp
        data['received'] = float(timezone.now().strftime('%s'))

        if not data.get('event_id'):
            data['event_id'] = uuid4().hex

        data.setdefault('culprit', None)
        data.setdefault('transaction', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('checksum', None)
        data.setdefault('fingerprint', None)
        data.setdefault('platform', None)
        data.setdefault('dist', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('errors', [])

        tags = data.get('tags')
        if not tags:
            tags = []
        # full support for dict syntax
        elif isinstance(tags, dict):
            tags = list(tags.items())
        # prevent [tag, tag, tag] (invalid) syntax
        elif not all(len(t) == 2 for t in tags):
            tags = []
        else:
            tags = list(tags)

        data['tags'] = []
        for key, value in tags:
            key = six.text_type(key).strip()
            value = six.text_type(value).strip()
            if not (key and value):
                continue

            # XXX(dcramer): many legacy apps are using the environment tag
            # rather than the key itself
            if key == 'environment' and not data.get('environment'):
                data['environment'] = value
            else:
                data['tags'].append((key, value))

        if not isinstance(data['extra'], dict):
            # throw it away
            data['extra'] = {}

        trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        # TODO(dcramer): more of validate data needs stuffed into the manager
        for key in list(iter(data)):
            if key in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(key)

            try:
                interface = get_interface(key)()
            except ValueError:
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception:
                # XXX: we should consider logging this.
                pass

        # TODO(dcramer): this logic is duplicated in ``validate_data`` from
        # coreapi

        # message is coerced to an interface, as its used for pure
        # index of searchable strings
        # See GH-3248
        message = data.pop('message', None)
        if message:
            if 'sentry.interfaces.Message' not in data:
                interface = get_interface('sentry.interfaces.Message')
                try:
                    inst = interface.to_python({
                        'message': message,
                    })
                    data[inst.get_path()] = inst.to_json()
                except Exception:
                    pass
            elif not data['sentry.interfaces.Message'].get('formatted'):
                interface = get_interface('sentry.interfaces.Message')
                try:
                    inst = interface.to_python(
                        dict(
                            data['sentry.interfaces.Message'],
                            formatted=message,
                        )
                    )
                    data[inst.get_path()] = inst.to_json()
                except Exception:
                    pass

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data['type'] = eventtypes.infer(data).key

        data['version'] = self.version

        # TODO(dcramer): find a better place for this logic
        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        if 'sentry.interfaces.Http' in data:
            try:
                ip_address = validate_ip(
                    data['sentry.interfaces.Http'].get('env', {}).get('REMOTE_ADDR'),
                    required=False,
                )
            except ValueError:
                ip_address = None
            if ip_address:
                data.setdefault('sentry.interfaces.User', {})
                data['sentry.interfaces.User'].setdefault('ip_address', ip_address)

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['transaction']:
            data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH)

        return data
Ejemplo n.º 26
0
 def test_allows_http(self):
     from sentry.interfaces.http import Http
     result = get_interface('sentry.interfaces.Http')
     assert result is Http
     result = get_interface('request')
     assert result is Http
Ejemplo n.º 27
0
    def validate_data(self, project, data):
        # TODO(dcramer): move project out of the data packet
        data["project"] = project.id

        data["errors"] = []

        if not data.get("message"):
            data["message"] = "<no message value>"
        elif not isinstance(data["message"], six.string_types):
            raise APIForbidden("Invalid value for message")

        if data.get("culprit"):
            if not isinstance(data["culprit"], six.string_types):
                raise APIForbidden("Invalid value for culprit")

        if not data.get("event_id"):
            data["event_id"] = uuid.uuid4().hex
        elif not isinstance(data["event_id"], six.string_types):
            raise APIForbidden("Invalid value for event_id")

        if len(data["event_id"]) > 32:
            self.log.info("Discarded value for event_id due to length (%d chars)", len(data["event_id"]))
            data["errors"].append({"type": EventError.VALUE_TOO_LONG, "name": "event_id", "value": data["event_id"]})
            data["event_id"] = uuid.uuid4().hex

        if "timestamp" in data:
            try:
                self._process_data_timestamp(data)
            except InvalidTimestamp as e:
                self.log.info("Discarded invalid value for timestamp: %r", data["timestamp"], exc_info=True)
                data["errors"].append(
                    {"type": EventError.INVALID_DATA, "name": "timestamp", "value": data["timestamp"]}
                )
                del data["timestamp"]

        if "fingerprint" in data:
            try:
                self._process_fingerprint(data)
            except InvalidFingerprint as e:
                self.log.info("Discarded invalid value for fingerprint: %r", data["fingerprint"], exc_info=True)
                data["errors"].append(
                    {"type": EventError.INVALID_DATA, "name": "fingerprint", "value": data["fingerprint"]}
                )

        if "platform" not in data or data["platform"] not in VALID_PLATFORMS:
            data["platform"] = "other"

        if data.get("modules") and type(data["modules"]) != dict:
            self.log.info("Discarded invalid type for modules: %s", type(data["modules"]))
            data["errors"].append({"type": EventError.INVALID_DATA, "name": "modules", "value": data["modules"]})
            del data["modules"]

        if data.get("extra") is not None and type(data["extra"]) != dict:
            self.log.info("Discarded invalid type for extra: %s", type(data["extra"]))
            data["errors"].append({"type": EventError.INVALID_DATA, "name": "extra", "value": data["extra"]})
            del data["extra"]

        if data.get("tags") is not None:
            if type(data["tags"]) == dict:
                data["tags"] = data["tags"].items()
            elif not isinstance(data["tags"], (list, tuple)):
                self.log.info("Discarded invalid type for tags: %s", type(data["tags"]))
                data["errors"].append({"type": EventError.INVALID_DATA, "name": "tags", "value": data["tags"]})
                del data["tags"]

        if data.get("tags"):
            # remove any values which are over 32 characters
            tags = []
            for pair in data["tags"]:
                try:
                    k, v = pair
                except ValueError:
                    self.log.info("Discarded invalid tag value: %r", pair)
                    data["errors"].append({"type": EventError.INVALID_DATA, "name": "tags", "value": pair})
                    continue

                if not isinstance(k, six.string_types):
                    try:
                        k = six.text_type(k)
                    except Exception:
                        self.log.info("Discarded invalid tag key: %r", type(k))
                        data["errors"].append({"type": EventError.INVALID_DATA, "name": "tags", "value": pair})
                        continue

                if not isinstance(v, six.string_types):
                    try:
                        v = six.text_type(v)
                    except Exception:
                        self.log.info("Discarded invalid tag value: %s=%r", k, type(v))
                        data["errors"].append({"type": EventError.INVALID_DATA, "name": "tags", "value": pair})
                        continue

                if len(k) > MAX_TAG_KEY_LENGTH or len(v) > MAX_TAG_VALUE_LENGTH:
                    self.log.info("Discarded invalid tag: %s=%s", k, v)
                    data["errors"].append({"type": EventError.INVALID_DATA, "name": "tags", "value": pair})
                    continue

                # support tags with spaces by converting them
                k = k.replace(" ", "-")

                if not TagKey.is_valid_key(k):
                    self.log.info("Discarded invalid tag key: %s", k)
                    data["errors"].append({"type": EventError.INVALID_DATA, "name": "tags", "value": pair})
                    continue

                tags.append((k, v))
            data["tags"] = tags

        for k in data.keys():
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.log.info("Ignored empty interface value: %s", k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.log.info("Ignored unknown attribute: %s", k)
                data["errors"].append({"type": EventError.INVALID_ATTRIBUTE, "name": k})
                continue

            if type(value) != dict:
                # HACK(dcramer): the exception interface supports a list as the
                # value. We should change this in a new protocol version.
                if type(value) in (list, tuple):
                    value = {"values": value}
                else:
                    self.log.info("Invalid parameter for value: %s (%r)", k, type(value))
                    data["errors"].append({"type": EventError.INVALID_DATA, "name": k, "value": value})
                    continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                if isinstance(e, InterfaceValidationError):
                    log = self.log.info
                else:
                    log = self.log.error
                log("Discarded invalid value for interface: %s (%r)", k, value, exc_info=True)
                data["errors"].append({"type": EventError.INVALID_DATA, "name": k, "value": value})

        level = data.get("level") or DEFAULT_LOG_LEVEL
        if isinstance(level, six.string_types) and not level.isdigit():
            # assume it's something like 'warning'
            try:
                data["level"] = LOG_LEVEL_REVERSE_MAP[level]
            except KeyError as e:
                self.log.info("Discarded invalid logger value: %s", level)
                data["errors"].append({"type": EventError.INVALID_DATA, "name": "level", "value": level})
                data["level"] = LOG_LEVEL_REVERSE_MAP.get(DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL)

        if data.get("release"):
            data["release"] = unicode(data["release"])
            if len(data["release"]) > 64:
                data["errors"].append({"type": EventError.VALUE_TOO_LONG, "name": "release", "value": data["release"]})
                del data["release"]

        return data
Ejemplo n.º 28
0
    def normalize(self):
        # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip
        # First we pull out our top-level (non-data attr) kwargs
        data = self.data

        if not isinstance(data.get("level"), (six.string_types, int)):
            data["level"] = logging.ERROR
        elif data["level"] not in LOG_LEVELS:
            data["level"] = logging.ERROR

        if not data.get("logger"):
            data["logger"] = DEFAULT_LOGGER_NAME
        else:
            logger = trim(data["logger"].strip(), 64)
            if TagKey.is_valid_key(logger):
                data["logger"] = logger
            else:
                data["logger"] = DEFAULT_LOGGER_NAME

        if data.get("platform"):
            data["platform"] = trim(data["platform"], 64)

        current_timestamp = timezone.now()
        timestamp = data.get("timestamp")
        if not timestamp:
            timestamp = current_timestamp

        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime("%s"))

        data["timestamp"] = timestamp
        data["received"] = float(timezone.now().strftime("%s"))

        if not data.get("event_id"):
            data["event_id"] = uuid4().hex

        data.setdefault("culprit", None)
        data.setdefault("server_name", None)
        data.setdefault("site", None)
        data.setdefault("checksum", None)
        data.setdefault("fingerprint", None)
        data.setdefault("platform", None)
        data.setdefault("environment", None)
        data.setdefault("extra", {})
        data.setdefault("errors", [])

        tags = data.get("tags")
        if not tags:
            tags = []
        # full support for dict syntax
        elif isinstance(tags, dict):
            tags = tags.items()
        # prevent [tag, tag, tag] (invalid) syntax
        elif not all(len(t) == 2 for t in tags):
            tags = []
        else:
            tags = list(tags)

        data["tags"] = []
        for key, value in tags:
            key = six.text_type(key).strip()
            value = six.text_type(value).strip()
            if not (key and value):
                continue

            data["tags"].append((key, value))

            # XXX(dcramer): many legacy apps are using the environment tag
            # rather than the key itself
            if key == "environment" and not data.get("environment"):
                data["environment"] = value

        if not isinstance(data["extra"], dict):
            # throw it away
            data["extra"] = {}

        trim_dict(data["extra"], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        # TODO(dcramer): more of validate data needs stuffed into the manager
        for key in data.keys():
            if key in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(key)

            try:
                interface = get_interface(key)()
            except ValueError:
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception:
                # XXX: we should consider logging this.
                pass

        # TODO(dcramer): this logic is duplicated in ``validate_data`` from
        # coreapi

        # message is coerced to an interface, as its used for pure
        # index of searchable strings
        # See GH-3248
        message = data.pop("message", None)
        if message:
            if "sentry.interfaces.Message" not in data:
                interface = get_interface("sentry.interfaces.Message")
                try:
                    inst = interface.to_python({"message": message})
                    data[inst.get_path()] = inst.to_json()
                except Exception:
                    pass
            elif not data["sentry.interfaces.Message"].get("formatted"):
                interface = get_interface("sentry.interfaces.Message")
                try:
                    inst = interface.to_python(dict(data["sentry.interfaces.Message"], formatted=message))
                    data[inst.get_path()] = inst.to_json()
                except Exception:
                    pass

        # the SDKs currently do not describe event types, and we must infer
        # them from available attributes
        data["type"] = eventtypes.infer(data).key

        data["version"] = self.version

        # TODO(dcramer): find a better place for this logic
        exception = data.get("sentry.interfaces.Exception")
        stacktrace = data.get("sentry.interfaces.Stacktrace")
        if exception and len(exception["values"]) == 1 and stacktrace:
            exception["values"][0]["stacktrace"] = stacktrace
            del data["sentry.interfaces.Stacktrace"]

        if "sentry.interfaces.Http" in data:
            try:
                ip_address = validate_ip(
                    data["sentry.interfaces.Http"].get("env", {}).get("REMOTE_ADDR"), required=False
                )
            except ValueError:
                ip_address = None
            if ip_address:
                data.setdefault("sentry.interfaces.User", {})
                data["sentry.interfaces.User"].setdefault("ip_address", ip_address)

        if data["culprit"]:
            data["culprit"] = trim(data["culprit"], MAX_CULPRIT_LENGTH)

        return data
Ejemplo n.º 29
0
def validate_data(project, data, client=None):
    # TODO(dcramer): move project out of the data packet
    data['project'] = project.id

    if not data.get('message'):
        data['message'] = '<no message value>'
    elif not isinstance(data['message'], six.string_types):
        raise APIError('Invalid value for message')
    elif len(data['message']) > settings.SENTRY_MAX_MESSAGE_LENGTH:
        logger.info(
            'Truncated value for message due to length (%d chars)',
            len(data['message']), **client_metadata(client, project))
        data['message'] = truncatechars(
            data['message'], settings.SENTRY_MAX_MESSAGE_LENGTH)

    if data.get('culprit'):
        if not isinstance(data['culprit'], six.string_types):
            raise APIError('Invalid value for culprit')
        logger.info(
            'Truncated value for culprit due to length (%d chars)',
            len(data['culprit']), **client_metadata(client, project))
        data['culprit'] = truncatechars(data['culprit'], MAX_CULPRIT_LENGTH)

    if not data.get('event_id'):
        data['event_id'] = uuid.uuid4().hex
    elif not isinstance(data['event_id'], six.string_types):
        raise APIError('Invalid value for event_id')
    if len(data['event_id']) > 32:
        logger.info(
            'Discarded value for event_id due to length (%d chars)',
            len(data['event_id']), **client_metadata(client, project))
        data['event_id'] = uuid.uuid4().hex

    if 'timestamp' in data:
        try:
            process_data_timestamp(data)
        except InvalidTimestamp as e:
            # Log the error, remove the timestamp, and continue
            logger.info(
                'Discarded invalid value for timestamp: %r', data['timestamp'],
                **client_metadata(client, project, exception=e))
            del data['timestamp']

    if data.get('modules') and type(data['modules']) != dict:
        logger.info(
            'Discarded invalid type for modules: %s',
            type(data['modules']), **client_metadata(client, project))
        del data['modules']

    if data.get('extra') is not None and type(data['extra']) != dict:
        logger.info(
            'Discarded invalid type for extra: %s',
            type(data['extra']), **client_metadata(client, project))
        del data['extra']

    if data.get('tags') is not None:
        if type(data['tags']) == dict:
            data['tags'] = data['tags'].items()
        elif not isinstance(data['tags'], (list, tuple)):
            logger.info(
                'Discarded invalid type for tags: %s',
                type(data['tags']), **client_metadata(client, project))
            del data['tags']

    if data.get('tags'):
        # remove any values which are over 32 characters
        tags = []
        for pair in data['tags']:
            try:
                k, v = pair
            except ValueError:
                logger.info('Discarded invalid tag value: %r',
                            pair, **client_metadata(client, project))
                continue

            if not isinstance(k, six.string_types):
                try:
                    k = six.text_type(k)
                except Exception:
                    logger.info('Discarded invalid tag key: %r',
                                type(k), **client_metadata(client, project))
                    continue

            if not isinstance(v, six.string_types):
                try:
                    v = six.text_type(v)
                except Exception:
                    logger.info('Discarded invalid tag value: %s=%r',
                                k, type(v), **client_metadata(client, project))
                    continue
            if len(k) > MAX_TAG_KEY_LENGTH or len(v) > MAX_TAG_VALUE_LENGTH:
                logger.info('Discarded invalid tag: %s=%s',
                            k, v, **client_metadata(client, project))
                continue
            tags.append((k, v))
        data['tags'] = tags

    for k in data.keys():
        if k in RESERVED_FIELDS:
            continue

        value = data.pop(k)

        if not value:
            logger.info(
                'Ignored empty interface value: %s', k,
                **client_metadata(client, project))
            continue

        try:
            interface = get_interface(k)
        except ValueError:
            logger.info(
                'Ignored unknown attribute: %s', k,
                **client_metadata(client, project))
            continue

        if type(value) != dict:
            # HACK(dcramer): the exception interface supports a list as the
            # value. We should change this in a new protocol version.
            if type(value) in (list, tuple):
                value = {'values': value}
            else:
                logger.info(
                    'Invalid parameters for value: %s', k,
                    type(value), **client_metadata(client, project))
                continue

        try:
            inst = interface.to_python(value)
            data[inst.get_path()] = inst.to_json()
        except Exception as e:
            if isinstance(e, AssertionError):
                log = logger.info
            else:
                log = logger.error
            log('Discarded invalid value for interface: %s', k,
                **client_metadata(client, project, exception=e, extra={'value': value}))

    level = data.get('level') or DEFAULT_LOG_LEVEL
    if isinstance(level, six.string_types) and not level.isdigit():
        # assume it's something like 'warning'
        try:
            data['level'] = LOG_LEVEL_REVERSE_MAP[level]
        except KeyError as e:
            logger.info(
                'Discarded invalid logger value: %s', level,
                **client_metadata(client, project, exception=e))
            data['level'] = LOG_LEVEL_REVERSE_MAP.get(
                DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL)

    return data
Ejemplo n.º 30
0
def test_get_interface_does_not_let_through_disallowed_name():
    with pytest.raises(ValueError):
        get_interface('subprocess')
Ejemplo n.º 31
0
def validate_data(project, data, client=None):
    # TODO(dcramer): move project out of the data packet
    data["project"] = project.id

    if not data.get("message"):
        data["message"] = "<no message value>"
    elif not isinstance(data["message"], six.string_types):
        raise APIError("Invalid value for message")

    if data.get("culprit"):
        if not isinstance(data["culprit"], six.string_types):
            raise APIError("Invalid value for culprit")

    if not data.get("event_id"):
        data["event_id"] = uuid.uuid4().hex
    elif not isinstance(data["event_id"], six.string_types):
        raise APIError("Invalid value for event_id")
    if len(data["event_id"]) > 32:
        logger.info(
            "Discarded value for event_id due to length (%d chars)",
            len(data["event_id"]),
            **client_metadata(client, project)
        )
        data["event_id"] = uuid.uuid4().hex

    if "timestamp" in data:
        try:
            process_data_timestamp(data)
        except InvalidTimestamp as e:
            # Log the error, remove the timestamp, and continue
            logger.info(
                "Discarded invalid value for timestamp: %r",
                data["timestamp"],
                **client_metadata(client, project, exception=e)
            )
            del data["timestamp"]

    if data.get("modules") and type(data["modules"]) != dict:
        logger.info("Discarded invalid type for modules: %s", type(data["modules"]), **client_metadata(client, project))
        del data["modules"]

    if data.get("extra") is not None and type(data["extra"]) != dict:
        logger.info("Discarded invalid type for extra: %s", type(data["extra"]), **client_metadata(client, project))
        del data["extra"]

    if data.get("tags") is not None:
        if type(data["tags"]) == dict:
            data["tags"] = data["tags"].items()
        elif not isinstance(data["tags"], (list, tuple)):
            logger.info("Discarded invalid type for tags: %s", type(data["tags"]), **client_metadata(client, project))
            del data["tags"]

    if data.get("tags"):
        # remove any values which are over 32 characters
        tags = []
        for pair in data["tags"]:
            try:
                k, v = pair
            except ValueError:
                logger.info("Discarded invalid tag value: %r", pair, **client_metadata(client, project))
                continue

            if not isinstance(k, six.string_types):
                try:
                    k = six.text_type(k)
                except Exception:
                    logger.info("Discarded invalid tag key: %r", type(k), **client_metadata(client, project))
                    continue

            if not isinstance(v, six.string_types):
                try:
                    v = six.text_type(v)
                except Exception:
                    logger.info("Discarded invalid tag value: %s=%r", k, type(v), **client_metadata(client, project))
                    continue
            if len(k) > MAX_TAG_KEY_LENGTH or len(v) > MAX_TAG_VALUE_LENGTH:
                logger.info("Discarded invalid tag: %s=%s", k, v, **client_metadata(client, project))
                continue
            tags.append((k, v))
        data["tags"] = tags

    for k in data.keys():
        if k in RESERVED_FIELDS:
            continue

        value = data.pop(k)

        if not value:
            logger.info("Ignored empty interface value: %s", k, **client_metadata(client, project))
            continue

        try:
            interface = get_interface(k)
        except ValueError:
            logger.info("Ignored unknown attribute: %s", k, **client_metadata(client, project))
            continue

        if type(value) != dict:
            # HACK(dcramer): the exception interface supports a list as the
            # value. We should change this in a new protocol version.
            if type(value) in (list, tuple):
                value = {"values": value}
            else:
                logger.info("Invalid parameters for value: %s", k, type(value), **client_metadata(client, project))
                continue

        try:
            inst = interface.to_python(value)
            data[inst.get_path()] = inst.to_json()
        except Exception as e:
            if isinstance(e, AssertionError):
                log = logger.info
            else:
                log = logger.error
            log(
                "Discarded invalid value for interface: %s",
                k,
                **client_metadata(client, project, exception=e, extra={"value": value})
            )

    level = data.get("level") or DEFAULT_LOG_LEVEL
    if isinstance(level, six.string_types) and not level.isdigit():
        # assume it's something like 'warning'
        try:
            data["level"] = LOG_LEVEL_REVERSE_MAP[level]
        except KeyError as e:
            logger.info("Discarded invalid logger value: %s", level, **client_metadata(client, project, exception=e))
            data["level"] = LOG_LEVEL_REVERSE_MAP.get(DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL)

    return data
Ejemplo n.º 32
0
    def normalize(self):
        # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip
        # First we pull out our top-level (non-data attr) kwargs
        data = self.data

        if not isinstance(data.get('level'), (six.string_types, int)):
            data['level'] = logging.ERROR
        elif data['level'] not in LOG_LEVELS:
            data['level'] = logging.ERROR

        if not data.get('logger'):
            data['logger'] = DEFAULT_LOGGER_NAME
        else:
            logger = trim(data['logger'].strip(), 64)
            if TagKey.is_valid_key(logger):
                data['logger'] = logger
            else:
                data['logger'] = DEFAULT_LOGGER_NAME

        if data.get('platform'):
            data['platform'] = trim(data['platform'], 64)

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp

        if not data.get('event_id'):
            data['event_id'] = uuid4().hex

        data.setdefault('message', '')
        data.setdefault('culprit', None)
        data.setdefault('time_spent', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('checksum', None)
        data.setdefault('fingerprint', None)
        data.setdefault('platform', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('errors', [])

        tags = data.get('tags')
        if not tags:
            tags = []
        # full support for dict syntax
        elif isinstance(tags, dict):
            tags = tags.items()
        # prevent [tag, tag, tag] (invalid) syntax
        elif not all(len(t) == 2 for t in tags):
            tags = []
        else:
            tags = list(tags)

        data['tags'] = []
        for key, value in tags:
            key = six.text_type(key).strip()
            value = six.text_type(value).strip()
            if not (key and value):
                continue

            data['tags'].append((key, value))

        if not isinstance(data['extra'], dict):
            # throw it away
            data['extra'] = {}

        trim_dict(
            data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        # TODO(dcramer): more of validate data needs stuffed into the manager
        for key in data.keys():
            if key in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(key)

            try:
                interface = get_interface(key)()
            except ValueError:
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception:
                pass

        data['version'] = self.version

        # TODO(dcramer): find a better place for this logic
        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        if 'sentry.interfaces.Http' in data:
            try:
                ip_address = validate_ip(
                    data['sentry.interfaces.Http'].get(
                        'env', {}).get('REMOTE_ADDR'),
                    required=False,
                )
            except ValueError:
                ip_address = None
            if ip_address:
                data.setdefault('sentry.interfaces.User', {})
                data['sentry.interfaces.User'].setdefault(
                    'ip_address', ip_address)

        if data['time_spent']:
            data['time_spent'] = int(data['time_spent'])

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['message']:
            data['message'] = trim(
                data['message'], settings.SENTRY_MAX_MESSAGE_LENGTH)

        return data
Ejemplo n.º 33
0
def test_get_interface_does_not_let_through_disallowed_name():
    with pytest.raises(ValueError):
        get_interface('subprocess')
Ejemplo n.º 34
0
    def validate_data(self, project, data):
        # TODO(dcramer): move project out of the data packet
        data['project'] = project.id

        data['errors'] = []

        if not data.get('message'):
            data['message'] = '<no message value>'
        elif not isinstance(data['message'], six.string_types):
            raise APIForbidden('Invalid value for message')

        if data.get('culprit'):
            if not isinstance(data['culprit'], six.string_types):
                raise APIForbidden('Invalid value for culprit')

        if not data.get('event_id'):
            data['event_id'] = uuid.uuid4().hex
        elif not isinstance(data['event_id'], six.string_types):
            raise APIForbidden('Invalid value for event_id')

        if len(data['event_id']) > 32:
            self.log.info(
                'Discarded value for event_id due to length (%d chars)',
                len(data['event_id']))
            data['errors'].append({
                'type': EventError.VALUE_TOO_LONG,
                'name': 'event_id',
                'value': data['event_id'],
            })
            data['event_id'] = uuid.uuid4().hex

        if 'timestamp' in data:
            try:
                self._process_data_timestamp(data)
            except InvalidTimestamp as e:
                self.log.info(
                    'Discarded invalid value for timestamp: %r',
                    data['timestamp'], exc_info=True)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'timestamp',
                    'value': data['timestamp'],
                })
                del data['timestamp']

        if 'fingerprint' in data:
            try:
                self._process_fingerprint(data)
            except InvalidFingerprint as e:
                self.log.info(
                    'Discarded invalid value for fingerprint: %r',
                    data['fingerprint'], exc_info=True)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'fingerprint',
                    'value': data['fingerprint'],
                })
                del data['fingerprint']

        if 'platform' not in data or data['platform'] not in VALID_PLATFORMS:
            data['platform'] = 'other'

        if data.get('modules') and type(data['modules']) != dict:
            self.log.info(
                'Discarded invalid type for modules: %s',
                type(data['modules']))
            data['errors'].append({
                'type': EventError.INVALID_DATA,
                'name': 'modules',
                'value': data['modules'],
            })
            del data['modules']

        if data.get('extra') is not None and type(data['extra']) != dict:
            self.log.info(
                'Discarded invalid type for extra: %s',
                type(data['extra']))
            data['errors'].append({
                'type': EventError.INVALID_DATA,
                'name': 'extra',
                'value': data['extra'],
            })
            del data['extra']

        if data.get('tags') is not None:
            if type(data['tags']) == dict:
                data['tags'] = data['tags'].items()
            elif not isinstance(data['tags'], (list, tuple)):
                self.log.info(
                    'Discarded invalid type for tags: %s', type(data['tags']))
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'tags',
                    'value': data['tags'],
                })
                del data['tags']

        if data.get('tags'):
            # remove any values which are over 32 characters
            tags = []
            for pair in data['tags']:
                try:
                    k, v = pair
                except ValueError:
                    self.log.info('Discarded invalid tag value: %r', pair)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                if not isinstance(k, six.string_types):
                    try:
                        k = six.text_type(k)
                    except Exception:
                        self.log.info('Discarded invalid tag key: %r', type(k))
                        data['errors'].append({
                            'type': EventError.INVALID_DATA,
                            'name': 'tags',
                            'value': pair,
                        })
                        continue

                if not isinstance(v, six.string_types):
                    try:
                        v = six.text_type(v)
                    except Exception:
                        self.log.info('Discarded invalid tag value: %s=%r',
                                      k, type(v))
                        data['errors'].append({
                            'type': EventError.INVALID_DATA,
                            'name': 'tags',
                            'value': pair,
                        })
                        continue

                if len(k) > MAX_TAG_KEY_LENGTH or len(v) > MAX_TAG_VALUE_LENGTH:
                    self.log.info('Discarded invalid tag: %s=%s', k, v)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                # support tags with spaces by converting them
                k = k.replace(' ', '-')

                if TagKey.is_reserved_key(k):
                    self.log.info('Discarding reserved tag key: %s', k)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                if not TagKey.is_valid_key(k):
                    self.log.info('Discarded invalid tag key: %s', k)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                tags.append((k, v))
            data['tags'] = tags

        for k in data.keys():
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.log.info('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.log.info('Ignored unknown attribute: %s', k)
                data['errors'].append({
                    'type': EventError.INVALID_ATTRIBUTE,
                    'name': k,
                })
                continue

            if type(value) != dict:
                # HACK(dcramer): the exception/breadcrumbs interface supports a
                # list as the value. We should change this in a new protocol
                # version.
                if type(value) in (list, tuple):
                    value = {'values': value}
                else:
                    self.log.info(
                        'Invalid parameter for value: %s (%r)', k, type(value))
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': k,
                        'value': value,
                    })
                    continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                if isinstance(e, InterfaceValidationError):
                    log = self.log.info
                else:
                    log = self.log.error
                log('Discarded invalid value for interface: %s (%r)', k, value,
                    exc_info=True)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': k,
                    'value': value,
                })

        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, six.string_types) and not level.isdigit():
            # assume it's something like 'warning'
            try:
                data['level'] = LOG_LEVEL_REVERSE_MAP[level]
            except KeyError as e:
                self.log.info(
                    'Discarded invalid logger value: %s', level)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'level',
                    'value': level,
                })
                data['level'] = LOG_LEVEL_REVERSE_MAP.get(
                    DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL)

        if data.get('release'):
            data['release'] = unicode(data['release'])
            if len(data['release']) > 64:
                data['errors'].append({
                    'type': EventError.VALUE_TOO_LONG,
                    'name': 'release',
                    'value': data['release'],
                })
                del data['release']

        return data
Ejemplo n.º 35
0
    def validate_data(self, project, data):
        # TODO(dcramer): move project out of the data packet
        data['project'] = project.id

        data['errors'] = []

        if data.get('culprit'):
            if not isinstance(data['culprit'], six.string_types):
                raise APIForbidden('Invalid value for culprit')

        if not data.get('event_id'):
            data['event_id'] = uuid.uuid4().hex
        elif not isinstance(data['event_id'], six.string_types):
            raise APIForbidden('Invalid value for event_id')

        if len(data['event_id']) > 32:
            self.log.debug(
                'Discarded value for event_id due to length (%d chars)',
                len(data['event_id']))
            data['errors'].append({
                'type': EventError.VALUE_TOO_LONG,
                'name': 'event_id',
                'value': data['event_id'],
            })
            data['event_id'] = uuid.uuid4().hex
        elif not is_event_id(data['event_id']):
            self.log.debug(
                'Discarded invalid value for event_id: %r',
                data['event_id'], exc_info=True)
            data['errors'].append({
                'type': EventError.INVALID_DATA,
                'name': 'event_id',
                'value': data['event_id'],
            })
            data['event_id'] = uuid.uuid4().hex

        if 'timestamp' in data:
            try:
                self._process_data_timestamp(data)
            except InvalidTimestamp as e:
                self.log.debug(
                    'Discarded invalid value for timestamp: %r',
                    data['timestamp'], exc_info=True)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'timestamp',
                    'value': data['timestamp'],
                })
                del data['timestamp']

        if 'fingerprint' in data:
            try:
                self._process_fingerprint(data)
            except InvalidFingerprint as e:
                self.log.debug(
                    'Discarded invalid value for fingerprint: %r',
                    data['fingerprint'], exc_info=True)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'fingerprint',
                    'value': data['fingerprint'],
                })
                del data['fingerprint']

        if 'platform' not in data or data['platform'] not in VALID_PLATFORMS:
            data['platform'] = 'other'

        if data.get('modules') and type(data['modules']) != dict:
            self.log.debug(
                'Discarded invalid type for modules: %s',
                type(data['modules']))
            data['errors'].append({
                'type': EventError.INVALID_DATA,
                'name': 'modules',
                'value': data['modules'],
            })
            del data['modules']

        if data.get('extra') is not None and type(data['extra']) != dict:
            self.log.debug(
                'Discarded invalid type for extra: %s',
                type(data['extra']))
            data['errors'].append({
                'type': EventError.INVALID_DATA,
                'name': 'extra',
                'value': data['extra'],
            })
            del data['extra']

        if data.get('tags') is not None:
            if type(data['tags']) == dict:
                data['tags'] = list(data['tags'].items())
            elif not isinstance(data['tags'], (list, tuple)):
                self.log.debug(
                    'Discarded invalid type for tags: %s', type(data['tags']))
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'tags',
                    'value': data['tags'],
                })
                del data['tags']

        if data.get('tags'):
            # remove any values which are over 32 characters
            tags = []
            for pair in data['tags']:
                try:
                    k, v = pair
                except ValueError:
                    self.log.debug('Discarded invalid tag value: %r', pair)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                if not isinstance(k, six.string_types):
                    try:
                        k = six.text_type(k)
                    except Exception:
                        self.log.debug('Discarded invalid tag key: %r', type(k))
                        data['errors'].append({
                            'type': EventError.INVALID_DATA,
                            'name': 'tags',
                            'value': pair,
                        })
                        continue

                if not isinstance(v, six.string_types):
                    try:
                        v = six.text_type(v)
                    except Exception:
                        self.log.debug('Discarded invalid tag value: %s=%r',
                                      k, type(v))
                        data['errors'].append({
                            'type': EventError.INVALID_DATA,
                            'name': 'tags',
                            'value': pair,
                        })
                        continue

                if len(k) > MAX_TAG_KEY_LENGTH or len(v) > MAX_TAG_VALUE_LENGTH:
                    self.log.debug('Discarded invalid tag: %s=%s', k, v)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                # support tags with spaces by converting them
                k = k.replace(' ', '-')

                if TagKey.is_reserved_key(k):
                    self.log.debug('Discarding reserved tag key: %s', k)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                if not TagKey.is_valid_key(k):
                    self.log.debug('Discarded invalid tag key: %s', k)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                if not TagValue.is_valid_value(v):
                    self.log.debug('Discard invalid tag value: %s', v)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                tags.append((k, v))
            data['tags'] = tags

        for k in list(iter(data)):
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.log.debug('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.log.debug('Ignored unknown attribute: %s', k)
                data['errors'].append({
                    'type': EventError.INVALID_ATTRIBUTE,
                    'name': k,
                })
                continue

            if type(value) != dict:
                # HACK(dcramer): the exception/breadcrumbs interface supports a
                # list as the value. We should change this in a new protocol
                # version.
                if type(value) in (list, tuple):
                    value = {'values': value}
                else:
                    self.log.debug(
                        'Invalid parameter for value: %s (%r)', k, type(value))
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': k,
                        'value': value,
                    })
                    continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                if isinstance(e, InterfaceValidationError):
                    log = self.log.debug
                else:
                    log = self.log.error
                log('Discarded invalid value for interface: %s (%r)', k, value,
                    exc_info=True)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': k,
                    'value': value,
                })

        # TODO(dcramer): ideally this logic would happen in normalize, but today
        # we don't do "validation" there (create errors)

        # message is coerced to an interface, as its used for pure
        # index of searchable strings
        # See GH-3248
        message = data.pop('message', None)
        if message:
            if 'sentry.interfaces.Message' not in data:
                value = {
                    'message': message,
                }
            elif not data['sentry.interfaces.Message'].get('formatted'):
                value = data['sentry.interfaces.Message']
                value['formatted'] = message
            else:
                value = None

            if value is not None:
                k = 'sentry.interfaces.Message'
                interface = get_interface(k)
                try:
                    inst = interface.to_python(value)
                    data[inst.get_path()] = inst.to_json()
                except Exception as e:
                    if isinstance(e, InterfaceValidationError):
                        log = self.log.debug
                    else:
                        log = self.log.error
                    log('Discarded invalid value for interface: %s (%r)', k, value,
                        exc_info=True)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': k,
                        'value': value,
                    })

        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, six.string_types) and not level.isdigit():
            # assume it's something like 'warning'
            try:
                data['level'] = LOG_LEVELS_MAP[level]
            except KeyError as e:
                self.log.debug(
                    'Discarded invalid logger value: %s', level)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'level',
                    'value': level,
                })
                data['level'] = LOG_LEVELS_MAP.get(
                    DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL)

        if data.get('release'):
            data['release'] = six.text_type(data['release'])
            if len(data['release']) > 64:
                data['errors'].append({
                    'type': EventError.VALUE_TOO_LONG,
                    'name': 'release',
                    'value': data['release'],
                })
                del data['release']

        if data.get('dist'):
            data['dist'] = six.text_type(data['dist']).strip()
            if not data.get('release'):
                data['dist'] = None
            elif len(data['dist']) > 64:
                data['errors'].append({
                    'type': EventError.VALUE_TOO_LONG,
                    'name': 'dist',
                    'value': data['dist'],
                })
                del data['dist']
            elif _dist_re.match(data['dist']) is None:
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'dist',
                    'value': data['dist'],
                })
                del data['dist']

        if data.get('environment'):
            data['environment'] = six.text_type(data['environment'])
            if len(data['environment']) > 64:
                data['errors'].append({
                    'type': EventError.VALUE_TOO_LONG,
                    'name': 'environment',
                    'value': data['environment'],
                })
                del data['environment']

        if data.get('time_spent'):
            try:
                data['time_spent'] = int(data['time_spent'])
            except (ValueError, TypeError):
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'time_spent',
                    'value': data['time_spent'],
                })
                del data['time_spent']
            else:
                if data['time_spent'] > BoundedIntegerField.MAX_VALUE:
                    data['errors'].append({
                        'type': EventError.VALUE_TOO_LONG,
                        'name': 'time_spent',
                        'value': data['time_spent'],
                    })
                    del data['time_spent']

        return data
Ejemplo n.º 36
0
    def normalize(self):
        # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip
        # First we pull out our top-level (non-data attr) kwargs
        data = self.data

        if not isinstance(data.get('level'), (six.string_types, int)):
            data['level'] = logging.ERROR
        elif data['level'] not in LOG_LEVELS:
            data['level'] = logging.ERROR

        if not data.get('logger'):
            data['logger'] = DEFAULT_LOGGER_NAME
        else:
            logger = trim(data['logger'].strip(), 64)
            if TagKey.is_valid_key(logger):
                data['logger'] = logger
            else:
                data['logger'] = DEFAULT_LOGGER_NAME

        if data.get('platform'):
            data['platform'] = trim(data['platform'], 64)

        timestamp = data.get('timestamp')
        if not timestamp:
            timestamp = timezone.now()

        if isinstance(timestamp, datetime):
            # We must convert date to local time so Django doesn't mess it up
            # based on TIME_ZONE
            if settings.TIME_ZONE:
                if not timezone.is_aware(timestamp):
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
            elif timezone.is_aware(timestamp):
                timestamp = timestamp.replace(tzinfo=None)
            timestamp = float(timestamp.strftime('%s'))

        data['timestamp'] = timestamp

        if not data.get('event_id'):
            data['event_id'] = uuid4().hex

        data.setdefault('message', '')
        data.setdefault('culprit', None)
        data.setdefault('time_spent', None)
        data.setdefault('server_name', None)
        data.setdefault('site', None)
        data.setdefault('checksum', None)
        data.setdefault('fingerprint', None)
        data.setdefault('platform', None)
        data.setdefault('environment', None)
        data.setdefault('extra', {})
        data.setdefault('errors', [])

        tags = data.get('tags')
        if not tags:
            tags = []
        # full support for dict syntax
        elif isinstance(tags, dict):
            tags = tags.items()
        # prevent [tag, tag, tag] (invalid) syntax
        elif not all(len(t) == 2 for t in tags):
            tags = []
        else:
            tags = list(tags)

        data['tags'] = []
        for key, value in tags:
            key = six.text_type(key).strip()
            value = six.text_type(value).strip()
            if not (key and value):
                continue

            data['tags'].append((key, value))

        if not isinstance(data['extra'], dict):
            # throw it away
            data['extra'] = {}

        trim_dict(data['extra'],
                  max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)

        # TODO(dcramer): more of validate data needs stuffed into the manager
        for key in data.keys():
            if key in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(key)

            try:
                interface = get_interface(key)()
            except ValueError:
                continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception:
                pass

        data['version'] = self.version

        # TODO(dcramer): find a better place for this logic
        exception = data.get('sentry.interfaces.Exception')
        stacktrace = data.get('sentry.interfaces.Stacktrace')
        if exception and len(exception['values']) == 1 and stacktrace:
            exception['values'][0]['stacktrace'] = stacktrace
            del data['sentry.interfaces.Stacktrace']

        if 'sentry.interfaces.Http' in data:
            try:
                ip_address = validate_ip(
                    data['sentry.interfaces.Http'].get('env',
                                                       {}).get('REMOTE_ADDR'),
                    required=False,
                )
            except ValueError:
                ip_address = None
            if ip_address:
                data.setdefault('sentry.interfaces.User', {})
                data['sentry.interfaces.User'].setdefault(
                    'ip_address', ip_address)

        if data['time_spent']:
            data['time_spent'] = int(data['time_spent'])

        if data['culprit']:
            data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)

        if data['message']:
            data['message'] = trim(data['message'],
                                   settings.SENTRY_MAX_MESSAGE_LENGTH)

        return data
Ejemplo n.º 37
0
    def validate_data(self, project, data):
        # TODO(dcramer): move project out of the data packet
        data['project'] = project.id

        data['errors'] = []

        if data.get('culprit'):
            if not isinstance(data['culprit'], six.string_types):
                raise APIForbidden('Invalid value for culprit')

        if not data.get('event_id'):
            data['event_id'] = uuid.uuid4().hex
        elif not isinstance(data['event_id'], six.string_types):
            raise APIForbidden('Invalid value for event_id')

        if len(data['event_id']) > 32:
            self.log.debug(
                'Discarded value for event_id due to length (%d chars)',
                len(data['event_id']))
            data['errors'].append({
                'type': EventError.VALUE_TOO_LONG,
                'name': 'event_id',
                'value': data['event_id'],
            })
            data['event_id'] = uuid.uuid4().hex
        elif not is_event_id(data['event_id']):
            self.log.debug('Discarded invalid value for event_id: %r',
                           data['event_id'],
                           exc_info=True)
            data['errors'].append({
                'type': EventError.INVALID_DATA,
                'name': 'event_id',
                'value': data['event_id'],
            })
            data['event_id'] = uuid.uuid4().hex

        if 'timestamp' in data:
            try:
                self._process_data_timestamp(data)
            except InvalidTimestamp as e:
                self.log.debug('Discarded invalid value for timestamp: %r',
                               data['timestamp'],
                               exc_info=True)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'timestamp',
                    'value': data['timestamp'],
                })
                del data['timestamp']

        if 'fingerprint' in data:
            try:
                self._process_fingerprint(data)
            except InvalidFingerprint as e:
                self.log.debug('Discarded invalid value for fingerprint: %r',
                               data['fingerprint'],
                               exc_info=True)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'fingerprint',
                    'value': data['fingerprint'],
                })
                del data['fingerprint']

        if 'platform' not in data or data['platform'] not in VALID_PLATFORMS:
            data['platform'] = 'other'

        if data.get('modules') and type(data['modules']) != dict:
            self.log.debug('Discarded invalid type for modules: %s',
                           type(data['modules']))
            data['errors'].append({
                'type': EventError.INVALID_DATA,
                'name': 'modules',
                'value': data['modules'],
            })
            del data['modules']

        if data.get('extra') is not None and type(data['extra']) != dict:
            self.log.debug('Discarded invalid type for extra: %s',
                           type(data['extra']))
            data['errors'].append({
                'type': EventError.INVALID_DATA,
                'name': 'extra',
                'value': data['extra'],
            })
            del data['extra']

        if data.get('tags') is not None:
            if type(data['tags']) == dict:
                data['tags'] = list(data['tags'].items())
            elif not isinstance(data['tags'], (list, tuple)):
                self.log.debug('Discarded invalid type for tags: %s',
                               type(data['tags']))
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'tags',
                    'value': data['tags'],
                })
                del data['tags']

        if data.get('tags'):
            # remove any values which are over 32 characters
            tags = []
            for pair in data['tags']:
                try:
                    k, v = pair
                except ValueError:
                    self.log.debug('Discarded invalid tag value: %r', pair)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                if not isinstance(k, six.string_types):
                    try:
                        k = six.text_type(k)
                    except Exception:
                        self.log.debug('Discarded invalid tag key: %r',
                                       type(k))
                        data['errors'].append({
                            'type': EventError.INVALID_DATA,
                            'name': 'tags',
                            'value': pair,
                        })
                        continue

                if not isinstance(v, six.string_types):
                    try:
                        v = six.text_type(v)
                    except Exception:
                        self.log.debug('Discarded invalid tag value: %s=%r', k,
                                       type(v))
                        data['errors'].append({
                            'type': EventError.INVALID_DATA,
                            'name': 'tags',
                            'value': pair,
                        })
                        continue

                if len(k) > MAX_TAG_KEY_LENGTH or len(
                        v) > MAX_TAG_VALUE_LENGTH:
                    self.log.debug('Discarded invalid tag: %s=%s', k, v)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                # support tags with spaces by converting them
                k = k.replace(' ', '-')

                if tagstore.is_reserved_key(k):
                    self.log.debug('Discarding reserved tag key: %s', k)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                if not tagstore.is_valid_key(k):
                    self.log.debug('Discarded invalid tag key: %s', k)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                if not tagstore.is_valid_value(v):
                    self.log.debug('Discard invalid tag value: %s', v)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': 'tags',
                        'value': pair,
                    })
                    continue

                tags.append((k, v))
            data['tags'] = tags

        for k in list(iter(data)):
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.log.debug('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.log.debug('Ignored unknown attribute: %s', k)
                data['errors'].append({
                    'type': EventError.INVALID_ATTRIBUTE,
                    'name': k,
                })
                continue

            if type(value) != dict:
                # HACK(dcramer): the exception/breadcrumbs interface supports a
                # list as the value. We should change this in a new protocol
                # version.
                if type(value) in (list, tuple):
                    value = {'values': value}
                else:
                    self.log.debug('Invalid parameter for value: %s (%r)', k,
                                   type(value))
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': k,
                        'value': value,
                    })
                    continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                if isinstance(e, InterfaceValidationError):
                    log = self.log.debug
                else:
                    log = self.log.error
                log('Discarded invalid value for interface: %s (%r)',
                    k,
                    value,
                    exc_info=True)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': k,
                    'value': value,
                })

        # TODO(dcramer): ideally this logic would happen in normalize, but today
        # we don't do "validation" there (create errors)

        # message is coerced to an interface, as its used for pure
        # index of searchable strings
        # See GH-3248
        message = data.pop('message', None)
        if message:
            if 'sentry.interfaces.Message' not in data:
                value = {
                    'message': message,
                }
            elif not data['sentry.interfaces.Message'].get('formatted'):
                value = data['sentry.interfaces.Message']
                value['formatted'] = message
            else:
                value = None

            if value is not None:
                k = 'sentry.interfaces.Message'
                interface = get_interface(k)
                try:
                    inst = interface.to_python(value)
                    data[inst.get_path()] = inst.to_json()
                except Exception as e:
                    if isinstance(e, InterfaceValidationError):
                        log = self.log.debug
                    else:
                        log = self.log.error
                    log('Discarded invalid value for interface: %s (%r)',
                        k,
                        value,
                        exc_info=True)
                    data['errors'].append({
                        'type': EventError.INVALID_DATA,
                        'name': k,
                        'value': value,
                    })

        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, six.string_types) and not level.isdigit():
            # assume it's something like 'warning'
            try:
                data['level'] = LOG_LEVELS_MAP[level]
            except KeyError as e:
                self.log.debug('Discarded invalid logger value: %s', level)
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'level',
                    'value': level,
                })
                data['level'] = LOG_LEVELS_MAP.get(DEFAULT_LOG_LEVEL,
                                                   DEFAULT_LOG_LEVEL)

        if data.get('release'):
            data['release'] = six.text_type(data['release'])
            if len(data['release']) > 64:
                data['errors'].append({
                    'type': EventError.VALUE_TOO_LONG,
                    'name': 'release',
                    'value': data['release'],
                })
                del data['release']

        if data.get('dist'):
            data['dist'] = six.text_type(data['dist']).strip()
            if not data.get('release'):
                data['dist'] = None
            elif len(data['dist']) > 64:
                data['errors'].append({
                    'type': EventError.VALUE_TOO_LONG,
                    'name': 'dist',
                    'value': data['dist'],
                })
                del data['dist']
            elif _dist_re.match(data['dist']) is None:
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'dist',
                    'value': data['dist'],
                })
                del data['dist']

        if data.get('environment'):
            data['environment'] = six.text_type(data['environment'])
            if len(data['environment']) > 64:
                data['errors'].append({
                    'type': EventError.VALUE_TOO_LONG,
                    'name': 'environment',
                    'value': data['environment'],
                })
                del data['environment']

        if data.get('time_spent'):
            try:
                data['time_spent'] = int(data['time_spent'])
            except (ValueError, TypeError):
                data['errors'].append({
                    'type': EventError.INVALID_DATA,
                    'name': 'time_spent',
                    'value': data['time_spent'],
                })
                del data['time_spent']
            else:
                if data['time_spent'] > BoundedIntegerField.MAX_VALUE:
                    data['errors'].append({
                        'type': EventError.VALUE_TOO_LONG,
                        'name': 'time_spent',
                        'value': data['time_spent'],
                    })
                    del data['time_spent']

        return data
Ejemplo n.º 38
0
    def validate_data(self, project, data):
        # TODO(dcramer): move project out of the data packet
        data['project'] = project.id

        if not data.get('message'):
            data['message'] = '<no message value>'
        elif not isinstance(data['message'], six.string_types):
            raise APIForbidden('Invalid value for message')

        if data.get('culprit'):
            if not isinstance(data['culprit'], six.string_types):
                raise APIForbidden('Invalid value for culprit')

        if not data.get('event_id'):
            data['event_id'] = uuid.uuid4().hex
        elif not isinstance(data['event_id'], six.string_types):
            raise APIForbidden('Invalid value for event_id')

        if len(data['event_id']) > 32:
            self.log.info(
                'Discarded value for event_id due to length (%d chars)',
                len(data['event_id']))
            data['event_id'] = uuid.uuid4().hex

        if 'timestamp' in data:
            try:
                self._process_data_timestamp(data)
            except InvalidTimestamp as e:
                self.log.info(
                    'Discarded invalid value for timestamp: %r',
                    data['timestamp'], exc_info=True)
                del data['timestamp']

        if data.get('modules') and type(data['modules']) != dict:
            self.log.info(
                'Discarded invalid type for modules: %s',
                type(data['modules']))
            del data['modules']

        if data.get('extra') is not None and type(data['extra']) != dict:
            self.log.info(
                'Discarded invalid type for extra: %s',
                type(data['extra']))
            del data['extra']

        if data.get('tags') is not None:
            if type(data['tags']) == dict:
                data['tags'] = data['tags'].items()
            elif not isinstance(data['tags'], (list, tuple)):
                self.log.info(
                    'Discarded invalid type for tags: %s', type(data['tags']))
                del data['tags']

        if data.get('tags'):
            # remove any values which are over 32 characters
            tags = []
            for pair in data['tags']:
                try:
                    k, v = pair
                except ValueError:
                    self.log.info('Discarded invalid tag value: %r', pair)
                    continue

                if not isinstance(k, six.string_types):
                    try:
                        k = six.text_type(k)
                    except Exception:
                        self.log.info('Discarded invalid tag key: %r', type(k))
                        continue

                if not isinstance(v, six.string_types):
                    try:
                        v = six.text_type(v)
                    except Exception:
                        self.log.info('Discarded invalid tag value: %s=%r',
                                      k, type(v))
                        continue
                if len(k) > MAX_TAG_KEY_LENGTH or len(v) > MAX_TAG_VALUE_LENGTH:
                    self.log.info('Discarded invalid tag: %s=%s', k, v)
                    continue
                tags.append((k, v))
            data['tags'] = tags

        for k in data.keys():
            if k in CLIENT_RESERVED_ATTRS:
                continue

            value = data.pop(k)

            if not value:
                self.log.info('Ignored empty interface value: %s', k)
                continue

            try:
                interface = get_interface(k)
            except ValueError:
                self.log.info('Ignored unknown attribute: %s', k)
                continue

            if type(value) != dict:
                # HACK(dcramer): the exception interface supports a list as the
                # value. We should change this in a new protocol version.
                if type(value) in (list, tuple):
                    value = {'values': value}
                else:
                    self.log.info(
                        'Invalid parameter for value: %s (%r)', k, type(value))
                    continue

            try:
                inst = interface.to_python(value)
                data[inst.get_path()] = inst.to_json()
            except Exception as e:
                if isinstance(e, AssertionError):
                    log = self.log.info
                else:
                    log = self.log.error
                log('Discarded invalid value for interface: %s (%r)', k, value,
                    exc_info=True)

        level = data.get('level') or DEFAULT_LOG_LEVEL
        if isinstance(level, six.string_types) and not level.isdigit():
            # assume it's something like 'warning'
            try:
                data['level'] = LOG_LEVEL_REVERSE_MAP[level]
            except KeyError as e:
                self.log.info(
                    'Discarded invalid logger value: %s', level)
                data['level'] = LOG_LEVEL_REVERSE_MAP.get(
                    DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL)

        if data.get('release'):
            data['release'] = unicode(data['release'])

        return data