Esempio n. 1
0
    def _normalize_impl(self, project_id=None):
        if self._project and project_id and project_id != self._project.id:
            raise RuntimeError(
                "Initialized EventManager with one project ID and called save() with another one"
            )

        if self._normalized:
            raise RuntimeError("Already normalized")

        self._normalized = True

        from sentry_relay.processing import StoreNormalizer

        rust_normalizer = StoreNormalizer(
            project_id=self._project.id if self._project else project_id,
            client_ip=self._client_ip,
            client=self._auth.client if self._auth else None,
            key_id=six.text_type(self._key.id) if self._key else None,
            grouping_config=self._grouping_config,
            protocol_version=six.text_type(self.version) if self.version is not None else None,
            is_renormalize=self._is_renormalize,
            remove_other=self._remove_other,
            normalize_user_agent=True,
            sent_at=self.sent_at.isoformat() if self.sent_at is not None else None,
            **DEFAULT_STORE_NORMALIZER_ARGS
        )

        self._data = CanonicalKeyDict(rust_normalizer.normalize_event(dict(self._data)))
Esempio n. 2
0
    def _normalize_impl(self):
        if self._normalized:
            raise RuntimeError('Already normalized')
        self._normalized = True

        from semaphore.processing import StoreNormalizer
        rust_normalizer = StoreNormalizer(
            geoip_lookup=rust_geoip,
            project_id=self._project.id if self._project else None,
            client_ip=self._client_ip,
            client=self._auth.client if self._auth else None,
            key_id=six.text_type(self._key.id) if self._key else None,
            grouping_config=self._grouping_config,
            protocol_version=six.text_type(self.version)
            if self.version is not None else None,
            stacktrace_frames_hard_limit=settings.
            SENTRY_STACKTRACE_FRAMES_HARD_LIMIT,
            max_stacktrace_frames=settings.SENTRY_MAX_STACKTRACE_FRAMES,
            valid_platforms=list(VALID_PLATFORMS),
            max_secs_in_future=MAX_SECS_IN_FUTURE,
            max_secs_in_past=MAX_SECS_IN_PAST,
            enable_trimming=True,
            is_renormalize=self._is_renormalize)

        self._data = CanonicalKeyDict(
            rust_normalizer.normalize_event(dict(self._data)))

        normalize_user_agent(self._data)
Esempio n. 3
0
    def test_delitem(self):
        d = CanonicalKeyDict({'user': {'id': 'DemoUser'}})
        del d['user']
        assert d == {}

        d = CanonicalKeyDict({'user': {'id': 'DemoUser'}})
        del d['sentry.interfaces.User']
        assert d == {}
Esempio n. 4
0
    def test_delitem(self):
        d = CanonicalKeyDict({"user": {"id": "DemoUser"}})
        del d["user"]
        assert d == {}

        d = CanonicalKeyDict({"user": {"id": "DemoUser"}})
        del d["sentry.interfaces.User"]
        assert d == {}
Esempio n. 5
0
    def test_getitem_setitem(self):
        d = CanonicalKeyDict({"user": {"id": "DemoUser"}}, legacy=True)
        d["user"] = {"id": "other"}
        assert d["user"] == {"id": "other"}
        assert d["sentry.interfaces.User"] == {"id": "other"}

        d = CanonicalKeyDict({"user": {"id": "DemoUser"}}, legacy=True)
        d["sentry.interfaces.User"] = {"id": "other"}
        assert d["user"] == {"id": "other"}
        assert d["sentry.interfaces.User"] == {"id": "other"}
Esempio n. 6
0
    def __init__(self, data, skip_renormalization=False, **kwargs):
        is_renormalized = isinstance(data, EventDict) or (
            isinstance(data, NodeData) and isinstance(data.data, EventDict)
        )

        if not skip_renormalization and not is_renormalized:
            normalizer = StoreNormalizer(is_renormalize=True, enable_trimming=False)
            data = normalizer.normalize_event(dict(data))

        CanonicalKeyDict.__init__(self, data, **kwargs)
Esempio n. 7
0
    def test_getitem_setitem(self):
        d = CanonicalKeyDict({'user': {'id': 'DemoUser'}}, legacy=True)
        d['user'] = {'id': 'other'}
        assert d['user'] == {'id': 'other'}
        assert d['sentry.interfaces.User'] == {'id': 'other'}

        d = CanonicalKeyDict({'user': {'id': 'DemoUser'}}, legacy=True)
        d['sentry.interfaces.User'] = {'id': 'other'}
        assert d['user'] == {'id': 'other'}
        assert d['sentry.interfaces.User'] == {'id': 'other'}
Esempio n. 8
0
    def __init__(self, data, **kwargs):
        rust_renormalized = _should_skip_to_python(data.get('event_id'))
        if rust_renormalized:
            normalizer = StoreNormalizer(is_renormalize=True)
            data = normalizer.normalize_event(dict(data))

        metrics.incr('rust.renormalized', tags={'value': rust_renormalized})

        with configure_scope() as scope:
            scope.set_tag("rust.renormalized", rust_renormalized)

        CanonicalKeyDict.__init__(self, data, **kwargs)
Esempio n. 9
0
    def create_event(group=None,
                     project=None,
                     event_id=None,
                     normalize=True,
                     **kwargs):
        # XXX: Do not use this method for new tests! Prefer `store_event`.
        if event_id is None:
            event_id = uuid4().hex
        kwargs.setdefault('project', project if project else group.project)
        kwargs.setdefault('data', copy.deepcopy(DEFAULT_EVENT_DATA))
        kwargs.setdefault('platform', kwargs['data'].get('platform', 'python'))
        kwargs.setdefault('message', kwargs['data'].get('message', 'message'))
        if kwargs.get('tags'):
            tags = kwargs.pop('tags')
            if isinstance(tags, dict):
                tags = list(tags.items())
            kwargs['data']['tags'] = tags
        if kwargs.get('stacktrace'):
            stacktrace = kwargs.pop('stacktrace')
            kwargs['data']['stacktrace'] = stacktrace

        user = kwargs.pop('user', None)
        if user is not None:
            kwargs['data']['user'] = user

        kwargs['data'].setdefault('errors', [{
            'type': EventError.INVALID_DATA,
            'name': 'foobar',
        }])

        # maintain simple event Factories by supporting the legacy message
        # parameter just like our API would
        if 'logentry' not in kwargs['data']:
            kwargs['data']['logentry'] = {
                'message': kwargs['message'] or '<unlabeled event>',
            }

        if normalize:
            manager = EventManager(CanonicalKeyDict(kwargs['data']))
            manager.normalize()
            kwargs['data'] = manager.get_data()
            kwargs['data'].update(manager.materialize_metadata())
            kwargs['message'] = manager.get_search_message()

        # This is needed so that create_event saves the event in nodestore
        # under the correct key. This is usually dont in EventManager.save()
        kwargs['data'].setdefault(
            'node_id', Event.generate_node_id(kwargs['project'].id, event_id))

        event = Event(event_id=event_id, group=group, **kwargs)
        if group:
            EventMapping.objects.create(
                project_id=event.project.id,
                event_id=event_id,
                group=group,
            )
        # emulate EventManager refs
        event.data.bind_ref(event)
        event.save()
        return event
def _do_preprocess_event(cache_key, data, start_time, event_id, process_event):
    if cache_key:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'})
        error_logger.error('preprocess.failed.empty',
                           extra={'cache_key': cache_key})
        return

    data = CanonicalKeyDict(data)
    project = data['project']
    Raven.tags_context({
        'project': project,
    })

    if should_process(data):
        process_event.delay(cache_key=cache_key,
                            start_time=start_time,
                            event_id=event_id)
        return

    # If we get here, that means the event had no preprocessing needed to be done
    # so we can jump directly to save_event
    if cache_key:
        data = None
    save_event.delay(cache_key=cache_key,
                     data=data,
                     start_time=start_time,
                     event_id=event_id,
                     project_id=project)
Esempio n. 11
0
def _do_preprocess_event(cache_key, data, start_time, event_id, process_task):
    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed',
                     tags={
                         'reason': 'cache',
                         'stage': 'pre'
                     },
                     skip_internal=False)
        error_logger.error('preprocess.failed.empty',
                           extra={'cache_key': cache_key})
        return

    original_data = data
    data = CanonicalKeyDict(data)
    project_id = data['project']

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    project = Project.objects.get_from_cache(id=project_id)

    if should_process(data):
        from_reprocessing = process_task is process_event_from_reprocessing
        submit_process(project, from_reprocessing, cache_key, event_id,
                       start_time, original_data)
        return

    submit_save_event(project, cache_key, event_id, start_time, original_data)
Esempio n. 12
0
 def test_get_path_dict(self):
     assert get_path({}, 'a') is None
     assert get_path({'a': 2}, 'a') == 2
     assert get_path({'a': 2}, 'b') is None
     assert get_path({'a': {'b': []}}, 'a', 'b') == []
     assert get_path({'a': []}, 'a', 'b') is None
     assert get_path(CanonicalKeyDict({'a': 2}), 'a') == 2
Esempio n. 13
0
    def _normalize_impl(self):
        if self._normalized:
            raise RuntimeError('Already normalized')
        self._normalized = True

        from semaphore.processing import StoreNormalizer
        rust_normalizer = StoreNormalizer(
            geoip_lookup=rust_geoip,
            project_id=self._project.id if self._project else None,
            client_ip=self._client_ip,
            client=self._auth.client if self._auth else None,
            key_id=six.text_type(self._key.id) if self._key else None,
            grouping_config=self._grouping_config,
            protocol_version=six.text_type(self.version) if self.version is not None else None,
            stacktrace_frames_hard_limit=settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT,
            max_stacktrace_frames=settings.SENTRY_MAX_STACKTRACE_FRAMES,
            valid_platforms=list(VALID_PLATFORMS),
            max_secs_in_future=MAX_SECS_IN_FUTURE,
            max_secs_in_past=MAX_SECS_IN_PAST,
            enable_trimming=True,
            is_renormalize=self._is_renormalize,
            remove_other=self._remove_other,
        )

        self._data = CanonicalKeyDict(
            rust_normalizer.normalize_event(dict(self._data))
        )

        normalize_user_agent(self._data)
Esempio n. 14
0
def _do_preprocess_event(cache_key, data, start_time, event_id, process_task,
                         project):
    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "pre"
                     },
                     skip_internal=False)
        error_logger.error("preprocess.failed.empty",
                           extra={"cache_key": cache_key})
        return

    original_data = data
    data = CanonicalKeyDict(data)
    project_id = data["project"]

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    if project is None:
        project = Project.objects.get_from_cache(id=project_id)
    else:
        assert project.id == project_id, (project.id, project_id)

    if should_process(data):
        from_reprocessing = process_task is process_event_from_reprocessing
        submit_process(project, from_reprocessing, cache_key, event_id,
                       start_time, original_data)
        return

    submit_save_event(project, cache_key, event_id, start_time, original_data)
Esempio n. 15
0
 def test_get_path_dict(self):
     assert get_path({}, "a") is None
     assert get_path({"a": 2}, "a") == 2
     assert get_path({"a": 2}, "b") is None
     assert get_path({"a": {"b": []}}, "a", "b") == []
     assert get_path({"a": []}, "a", "b") is None
     assert get_path(CanonicalKeyDict({"a": 2}), "a") == 2
Esempio n. 16
0
File: node.py Progetto: webZW/sentry
 def __setstate__(self, state):
     # If there is a legacy pickled version that used to have data as a
     # duplicate, reject it.
     state.pop('data', None)
     if state.pop('_node_data_CANONICAL', False):
         state['_node_data'] = CanonicalKeyDict(state['_node_data'])
     self.__dict__ = state
Esempio n. 17
0
 def test_mixed(self):
     assert CanonicalKeyDict({
         'release': 'asdf',
         'exception': {'type': 'DemoException'},
         'user': {'id': 'DemoUser'},
         'sentry.interfaces.Exception': {'type': 'INVALID'},
         'sentry.interfaces.User': {'id': 'INVALID'},
     }) == self.canonical_data
Esempio n. 18
0
 def test_len(self):
     assert len(CanonicalKeyDict({
         'release': 'asdf',
         'exception': {'type': 'DemoException'},
         'user': {'id': 'DemoUser'},
         'sentry.interfaces.Exception': {'type': 'INVALID'},
         'sentry.interfaces.User': {'id': 'INVALID'},
     })) == 3
Esempio n. 19
0
 def test_get_path(self):
     assert get_path({}, ['a']) is None
     assert get_path({}, ['a'], 1) == 1
     assert get_path({'a': 2}, ['a']) == 2
     assert get_path({'a': 2}, ['b']) is None
     assert get_path({'a': 2}, ['b'], 1) == 1
     assert get_path({'a': {'b': []}}, ['a', 'b']) == []
     assert get_path({'a': []}, ['a', 'b']) is None
     assert get_path(CanonicalKeyDict({'a': 2}), ['a']) == 2
Esempio n. 20
0
def _do_preprocess_event(cache_key, data, start_time, event_id, process_task,
                         project):
    from sentry.lang.native.processing import should_process_with_symbolicator

    if cache_key and data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "pre"
                     },
                     skip_internal=False)
        error_logger.error("preprocess.failed.empty",
                           extra={"cache_key": cache_key})
        return

    original_data = data
    data = CanonicalKeyDict(data)
    project_id = data["project"]
    set_current_project(project_id)

    if project is None:
        project = Project.objects.get_from_cache(id=project_id)
    else:
        assert project.id == project_id, (project.id, project_id)

    from_reprocessing = process_task is process_event_from_reprocessing

    with metrics.timer(
            "tasks.store.preprocess_event.organization.get_from_cache"):
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

    if should_process_with_symbolicator(data):
        reprocessing2.backup_unprocessed_event(project=project,
                                               data=original_data)
        submit_symbolicate(project, from_reprocessing, cache_key, event_id,
                           start_time, original_data)
        return

    if should_process(data):
        reprocessing2.backup_unprocessed_event(project=project,
                                               data=original_data)
        submit_process(
            project,
            from_reprocessing,
            cache_key,
            event_id,
            start_time,
            data_has_changed=False,
        )
        return

    submit_save_event(project, from_reprocessing, cache_key, event_id,
                      start_time, original_data)
Esempio n. 21
0
    def create_event(self, event_id=None, normalize=True, **kwargs):
        if event_id is None:
            event_id = uuid4().hex
        if 'group' not in kwargs:
            kwargs['group'] = self.group
        kwargs.setdefault('project', kwargs['group'].project)
        kwargs.setdefault('data', copy.deepcopy(DEFAULT_EVENT_DATA))
        kwargs.setdefault('platform', kwargs['data'].get('platform', 'python'))
        kwargs.setdefault('message', kwargs['data'].get('message', 'message'))
        if kwargs.get('tags'):
            tags = kwargs.pop('tags')
            if isinstance(tags, dict):
                tags = list(tags.items())
            kwargs['data']['tags'] = tags
        if kwargs.get('stacktrace'):
            stacktrace = kwargs.pop('stacktrace')
            kwargs['data']['stacktrace'] = stacktrace

        user = kwargs.pop('user', None)
        if user is not None:
            kwargs['data']['user'] = user

        kwargs['data'].setdefault(
            'errors', [{
                'type': EventError.INVALID_DATA,
                'name': 'foobar',
            }]
        )

        # maintain simple event fixtures by supporting the legacy message
        # parameter just like our API would
        if 'logentry' not in kwargs['data']:
            kwargs['data']['logentry'] = {
                'message': kwargs.get('message') or '<unlabeled event>',
            }

        if normalize:
            manager = EventManager(CanonicalKeyDict(kwargs['data']),
                                   for_store=False)
            manager.normalize()
            kwargs['data'] = manager.get_data()
            kwargs['message'] = manager.get_search_message()

        else:
            assert 'message' not in kwargs, 'do not pass message this way'

        event = Event(event_id=event_id, **kwargs)
        EventMapping.objects.create(
            project_id=event.project.id,
            event_id=event_id,
            group=event.group,
        )
        # emulate EventManager refs
        event.data.bind_ref(event)
        event.save()
        return event
Esempio n. 22
0
 def test_canonical(self):
     assert CanonicalKeyDict({
         'release': 'asdf',
         'exception': {
             'type': 'DemoException'
         },
         'user': {
             'id': 'DemoUser'
         },
     }) == self.canonical_data
Esempio n. 23
0
 def test_canonical(self):
     assert (CanonicalKeyDict({
         "release": "asdf",
         "exception": {
             "type": "DemoException"
         },
         "user": {
             "id": "DemoUser"
         },
     }) == self.canonical_data)
Esempio n. 24
0
 def test_legacy(self):
     assert (CanonicalKeyDict({
         "release": "asdf",
         "sentry.interfaces.Exception": {
             "type": "DemoException"
         },
         "sentry.interfaces.User": {
             "id": "DemoUser"
         },
     }) == self.canonical_data)
Esempio n. 25
0
    def create_event(group=None,
                     project=None,
                     event_id=None,
                     normalize=True,
                     **kwargs):
        # XXX: Do not use this method for new tests! Prefer `store_event`.
        if event_id is None:
            event_id = uuid4().hex
        kwargs.setdefault("project", project if project else group.project)
        kwargs.setdefault("data", copy.deepcopy(DEFAULT_EVENT_DATA))
        kwargs.setdefault("platform", kwargs["data"].get("platform", "python"))
        kwargs.setdefault("message", kwargs["data"].get("message", "message"))
        if kwargs.get("tags"):
            tags = kwargs.pop("tags")
            if isinstance(tags, dict):
                tags = list(tags.items())
            kwargs["data"]["tags"] = tags
        if kwargs.get("stacktrace"):
            stacktrace = kwargs.pop("stacktrace")
            kwargs["data"]["stacktrace"] = stacktrace

        user = kwargs.pop("user", None)
        if user is not None:
            kwargs["data"]["user"] = user

        kwargs["data"].setdefault("errors", [{
            "type": EventError.INVALID_DATA,
            "name": "foobar"
        }])

        # maintain simple event Factories by supporting the legacy message
        # parameter just like our API would
        if "logentry" not in kwargs["data"]:
            kwargs["data"]["logentry"] = {
                "message": kwargs["message"] or "<unlabeled event>"
            }

        if normalize:
            manager = EventManager(CanonicalKeyDict(kwargs["data"]))
            manager.normalize()
            kwargs["data"] = manager.get_data()
            kwargs["data"].update(manager.materialize_metadata())
            kwargs["message"] = manager.get_search_message()

        # This is needed so that create_event saves the event in nodestore
        # under the correct key. This is usually dont in EventManager.save()
        kwargs["data"].setdefault(
            "node_id", Event.generate_node_id(kwargs["project"].id, event_id))

        event = Event(event_id=event_id, group=group, **kwargs)
        # emulate EventManager refs
        event.data.bind_ref(event)
        event.save()
        event.data.save()
        return event
Esempio n. 26
0
def _do_preprocess_event(cache_key, data, start_time, event_id, process_task,
                         project):
    from sentry.lang.native.processing import should_process_with_symbolicator

    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "pre"
                     },
                     skip_internal=False)
        error_logger.error("preprocess.failed.empty",
                           extra={"cache_key": cache_key})
        return

    original_data = data
    data = CanonicalKeyDict(data)
    project_id = data["project"]
    set_current_project(project_id)

    if project is None:
        project = Project.objects.get_from_cache(id=project_id)
    else:
        assert project.id == project_id, (project.id, project_id)

    from_reprocessing = process_task is process_event_from_reprocessing

    new_process_behavior = bool(
        options.get("sentry:preprocess-use-new-behavior", False))
    metrics.incr("tasks.store.preprocess_event.new_process_behavior",
                 tags={"value": new_process_behavior})

    if new_process_behavior and should_process_with_symbolicator(data):
        submit_symbolicate(project, from_reprocessing, cache_key, event_id,
                           start_time, original_data)
        return

    if should_process(data):
        submit_process(
            project,
            from_reprocessing,
            cache_key,
            event_id,
            start_time,
            original_data,
            data_has_changed=False,
            new_process_behavior=new_process_behavior,
        )
        return

    submit_save_event(project, cache_key, event_id, start_time, original_data)
Esempio n. 27
0
 def test_legacy(self):
     assert CanonicalKeyDict(
         {
             'release': 'asdf',
             'sentry.interfaces.Exception': {
                 'type': 'DemoException'
             },
             'sentry.interfaces.User': {
                 'id': 'DemoUser'
             },
         },
         legacy=True) == self.canonical_data
Esempio n. 28
0
    def create_event(self, event_id=None, **kwargs):
        if event_id is None:
            event_id = uuid4().hex
        if 'group' not in kwargs:
            kwargs['group'] = self.group
        kwargs.setdefault('project', kwargs['group'].project)
        kwargs.setdefault('data', copy.deepcopy(DEFAULT_EVENT_DATA))
        kwargs.setdefault('platform', kwargs['data'].get('platform', 'python'))
        kwargs.setdefault('message', kwargs['data'].get('message', 'message'))
        if kwargs.get('tags'):
            tags = kwargs.pop('tags')
            if isinstance(tags, dict):
                tags = list(tags.items())
            kwargs['data']['tags'] = tags
        if kwargs.get('stacktrace'):
            stacktrace = kwargs.pop('stacktrace')
            kwargs['data']['sentry.interfaces.Stacktrace'] = stacktrace

        kwargs['data'].setdefault(
            'errors', [{
                'type': EventError.INVALID_DATA,
                'name': 'foobar',
            }]
        )

        # maintain simple event fixtures by supporting the legacy message
        # parameter just like our API would
        if 'sentry.interfaces.Message' not in kwargs['data']:
            kwargs['data']['sentry.interfaces.Message'] = {
                'message': kwargs.get('message') or '<unlabeled event>',
            }

        if 'type' not in kwargs['data']:
            kwargs['data'].update(
                {
                    'type': 'default',
                    'metadata': {
                        'title': kwargs['data']['sentry.interfaces.Message']['message'],
                    },
                }
            )

        kwargs['data'] = CanonicalKeyDict(kwargs.pop('data'))
        event = Event(event_id=event_id, **kwargs)
        EventMapping.objects.create(
            project_id=event.project.id,
            event_id=event_id,
            group=event.group,
        )
        # emulate EventManager refs
        event.data.bind_ref(event)
        event.save()
        return event
Esempio n. 29
0
    def _normalize_impl(self):
        if self._normalized:
            raise RuntimeError("Already normalized")
        self._normalized = True

        from semaphore.processing import StoreNormalizer

        rust_normalizer = StoreNormalizer(
            project_id=self._project.id if self._project else None,
            client_ip=self._client_ip,
            client=self._auth.client if self._auth else None,
            key_id=six.text_type(self._key.id) if self._key else None,
            grouping_config=self._grouping_config,
            protocol_version=six.text_type(self.version) if self.version is not None else None,
            is_renormalize=self._is_renormalize,
            remove_other=self._remove_other,
            normalize_user_agent=True,
            **DEFAULT_STORE_NORMALIZER_ARGS
        )

        self._data = CanonicalKeyDict(rust_normalizer.normalize_event(dict(self._data)))
Esempio n. 30
0
def _decode_event(data, content_encoding):
    if isinstance(data, six.binary_type):
        if content_encoding == "gzip":
            data = decompress_gzip(data)
        elif content_encoding == "deflate":
            data = decompress_deflate(data)
        elif data[0] != b"{":
            data = decode_and_decompress_data(data)
        else:
            data = decode_data(data)
    if isinstance(data, six.text_type):
        data = safely_load_json_string(data)

    return CanonicalKeyDict(data)
Esempio n. 31
0
    def __init__(self, data, skip_renormalization=False, **kwargs):
        is_renormalized = (isinstance(data, EventDict)
                           or (isinstance(data, NodeData)
                               and isinstance(data.data, EventDict)))

        with configure_scope() as scope:
            scope.set_tag("rust.is_renormalized", is_renormalized)
            scope.set_tag("rust.skip_renormalization", skip_renormalization)
            scope.set_tag("rust.renormalized", "null")

        if not skip_renormalization and not is_renormalized:
            rust_renormalized = _should_skip_to_python(data.get('event_id'))
            if rust_renormalized:
                normalizer = StoreNormalizer(is_renormalize=True)
                data = normalizer.normalize_event(dict(data))

            metrics.incr('rust.renormalized',
                         tags={'value': rust_renormalized})

            with configure_scope() as scope:
                scope.set_tag("rust.renormalized", rust_renormalized)

        CanonicalKeyDict.__init__(self, data, **kwargs)
Esempio n. 32
0
    def __init__(self, data, skip_renormalization=False, **kwargs):
        is_renormalized = (
            isinstance(data, EventDict) or
            (isinstance(data, NodeData) and isinstance(data.data, EventDict))
        )

        with configure_scope() as scope:
            scope.set_tag("rust.is_renormalized", is_renormalized)
            scope.set_tag("rust.skip_renormalization", skip_renormalization)
            scope.set_tag("rust.renormalized", "null")

        if not skip_renormalization and not is_renormalized:
            rust_renormalized = _should_skip_to_python(data.get('event_id'))
            if rust_renormalized:
                normalizer = StoreNormalizer(is_renormalize=True)
                data = normalizer.normalize_event(dict(data))

            metrics.incr('rust.renormalized',
                         tags={'value': rust_renormalized})

            with configure_scope() as scope:
                scope.set_tag("rust.renormalized", rust_renormalized)

        CanonicalKeyDict.__init__(self, data, **kwargs)
Esempio n. 33
0
def _do_process_event(cache_key, start_time, event_id, process_task):
    from sentry.plugins import plugins

    data = default_cache.get(cache_key)

    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'})
        error_logger.error('process.failed.empty', extra={'cache_key': cache_key})
        return

    data = CanonicalKeyDict(data)
    project = data['project']
    Raven.tags_context({
        'project': project,
    })
    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project)

    # Stacktrace based event processors.  These run before anything else.
    new_data = process_stacktraces(data)
    if new_data is not None:
        has_changed = True
        data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(
            plugin.get_event_preprocessors, data=data, _with_transaction=False
        )
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project, 'Project cannot be mutated by preprocessor'

    if has_changed:
        issues = data.get('processing_issues')
        try:
            if issues and create_failed_event(
                cache_key, project, list(issues.values()),
                event_id=event_id, start_time=start_time,
                reprocessing_rev=reprocessing_rev
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            process_task.delay(cache_key, start_time=start_time,
                               event_id=event_id)
            return

        # We cannot persist canonical types in the cache, so we need to
        # downgrade this.
        if isinstance(data, CANONICAL_TYPES):
            data = dict(data.items())
        default_cache.set(cache_key, data, 3600)

    save_event.delay(
        cache_key=cache_key, data=None, start_time=start_time, event_id=event_id,
        project_id=project
    )
Esempio n. 34
0
def save_event(cache_key=None, data=None, start_time=None, event_id=None,
               project_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas, tsdb
    from sentry.models import ProjectKey

    if cache_key:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'post'})
        return

    Raven.tags_context({
        'project': project_id,
    })

    try:
        manager = EventManager(data)
        event = manager.save(project_id)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has('organizations:event-attachments', event.project.organization, actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

    except HashDiscarded:
        increment_list = [
            (tsdb.models.project_total_received_discarded, project_id),
        ]

        try:
            project = Project.objects.get_from_cache(id=project_id)
        except Project.DoesNotExist:
            pass
        else:
            increment_list.extend([
                (tsdb.models.project_total_blacklisted, project.id),
                (tsdb.models.organization_total_blacklisted, project.organization_id),
            ])

            project_key = None
            if data.get('key_id') is not None:
                try:
                    project_key = ProjectKey.objects.get_from_cache(id=data['key_id'])
                except ProjectKey.DoesNotExist:
                    pass
                else:
                    increment_list.append((tsdb.models.key_total_blacklisted, project_key.id))

            quotas.refund(
                project,
                key=project_key,
                timestamp=start_time,
            )

        tsdb.incr_multi(
            increment_list,
            timestamp=to_datetime(start_time) if start_time is not None else None,
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)
            attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing(
                'events.time-to-process',
                time() - start_time,
                instance=data['platform'])
Esempio n. 35
0
def load_data(platform, default=None, sample_name=None):
    # NOTE: Before editing this data, make sure you understand the context
    # in which its being used. It is NOT only used for local development and
    # has production consequences.
    #   * bin/load-mocks to generate fake data for local testing
    #   * When a new project is created, a fake event is generated as a "starter"
    #     event so it's not an empty project.
    #   * When a user clicks Test Configuration from notification plugin settings page,
    #     a fake event is generated to go through the pipeline.
    data = None
    language = None
    platform_data = INTEGRATION_ID_TO_PLATFORM_DATA.get(platform)

    if platform_data is not None and platform_data['type'] != 'language':
        language = platform_data['language']

    for platform in (platform, language, default):
        if not platform:
            continue

        json_path = os.path.join(DATA_ROOT, 'samples', '%s.json' % (platform.encode('utf-8'), ))
        if not os.path.exists(json_path):
            continue

        if not sample_name:
            try:
                sample_name = INTEGRATION_ID_TO_PLATFORM_DATA[platform]['name']
            except KeyError:
                pass

        with open(json_path) as fp:
            data = json.loads(fp.read())
            break

    if data is None:
        return

    data = CanonicalKeyDict(data)
    if platform in ('csp', 'hkpk', 'expectct', 'expectstaple'):
        return data

    data['platform'] = platform
    # XXX: Message is a legacy alias for logentry. Do not overwrite if set.
    if 'message' not in data:
        data['message'] = 'This is an example %s exception' % (sample_name or platform, )
    data.setdefault('user', generate_user(
        ip_address='127.0.0.1',
        username='******',
        id=1,
        email='*****@*****.**',
    ))
    data.setdefault('extra', {
        'session': {
            'foo': 'bar',
        },
        'results': [1, 2, 3, 4, 5],
        'emptyList': [],
        'emptyMap': {},
        'length': 10837790,
        'unauthorized': False,
        'url': 'http://example.org/foo/bar/',
    })
    data.setdefault('modules', {
        'my.package': '1.0.0',
    })
    data.setdefault('request', {
        "cookies": 'foo=bar;biz=baz',
        "url": "http://example.com/foo",
        "headers": {
            "Referer":
            "http://example.com",
            "Content-Type":
            "application/json",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36"
        },
        "env": {
            'ENV': 'prod',
        },
        "query_string": "foo=bar",
        "data": '{"hello": "world"}',
        "method": "GET"
    })

    return data
Esempio n. 36
0
class EventManager(object):
    """
    Handles normalization in both the store endpoint and the save task. The
    intention is to swap this class out with a reimplementation in Rust.
    """

    def __init__(
        self,
        data,
        version='5',
        project=None,
        grouping_config=None,
        client_ip=None,
        user_agent=None,
        auth=None,
        key=None,
        content_encoding=None,
        is_renormalize=False,
        remove_other=None
    ):
        self._data = _decode_event(data, content_encoding=content_encoding)
        self.version = version
        self._project = project
        if grouping_config is None and project is not None:
            grouping_config = get_grouping_config_dict_for_project(self._project)
        self._grouping_config = grouping_config
        self._client_ip = client_ip
        self._user_agent = user_agent
        self._auth = auth
        self._key = key
        self._is_renormalize = is_renormalize
        self._remove_other = remove_other
        self._normalized = False

    def process_csp_report(self):
        """Only called from the CSP report endpoint."""
        data = self._data

        try:
            interface = get_interface(data.pop('interface'))
            report = data.pop('report')
        except KeyError:
            raise APIForbidden('No report or interface data')

        # To support testing, we can either accept a built interface instance, or the raw data in
        # which case we build the instance ourselves
        try:
            instance = (
                report if isinstance(report, interface) else interface.from_raw(report)
            )
        except jsonschema.ValidationError as e:
            raise APIError('Invalid security report: %s' % str(e).splitlines()[0])

        def clean(d):
            return dict(filter(lambda x: x[1], d.items()))

        data.update(
            {
                'logger': 'csp',
                'message': instance.get_message(),
                'culprit': instance.get_culprit(),
                instance.path: instance.to_json(),
                'tags': instance.get_tags(),
                'errors': [],
                'user': {'ip_address': self._client_ip},
                # Construct a faux Http interface based on the little information we have
                # This is a bit weird, since we don't have nearly enough
                # information to create an Http interface, but
                # this automatically will pick up tags for the User-Agent
                # which is actually important here for CSP
                'request': {
                    'url': instance.get_origin(),
                    'headers': clean(
                        {
                            'User-Agent': self._user_agent,
                            'Referer': instance.get_referrer(),
                        }
                    ),
                },
            }
        )

        self._data = data

    def normalize(self):
        with metrics.timer('events.store.normalize.duration'):
            self._normalize_impl()

        metrics.timing(
            'events.store.normalize.errors',
            len(self._data.get("errors") or ()),
        )

    def _normalize_impl(self):
        if self._normalized:
            raise RuntimeError('Already normalized')
        self._normalized = True

        from semaphore.processing import StoreNormalizer
        rust_normalizer = StoreNormalizer(
            geoip_lookup=rust_geoip,
            project_id=self._project.id if self._project else None,
            client_ip=self._client_ip,
            client=self._auth.client if self._auth else None,
            key_id=six.text_type(self._key.id) if self._key else None,
            grouping_config=self._grouping_config,
            protocol_version=six.text_type(self.version) if self.version is not None else None,
            stacktrace_frames_hard_limit=settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT,
            max_stacktrace_frames=settings.SENTRY_MAX_STACKTRACE_FRAMES,
            valid_platforms=list(VALID_PLATFORMS),
            max_secs_in_future=MAX_SECS_IN_FUTURE,
            max_secs_in_past=MAX_SECS_IN_PAST,
            enable_trimming=True,
            is_renormalize=self._is_renormalize,
            remove_other=self._remove_other,
        )

        self._data = CanonicalKeyDict(
            rust_normalizer.normalize_event(dict(self._data))
        )

        normalize_user_agent(self._data)

    def should_filter(self):
        '''
        returns (result: bool, reason: string or None)
        Result is True if an event should be filtered
        The reason for filtering is passed along as a string
        so that we can store it in metrics
        '''
        for name in SECURITY_REPORT_INTERFACES:
            if name in self._data:
                interface = get_interface(name)
                if interface.to_python(self._data[name]).should_filter(self._project):
                    return (True, FilterStatKeys.INVALID_CSP)

        if self._client_ip and not is_valid_ip(self._project, self._client_ip):
            return (True, FilterStatKeys.IP_ADDRESS)

        release = self._data.get('release')
        if release and not is_valid_release(self._project, release):
            return (True, FilterStatKeys.RELEASE_VERSION)

        error_message = get_path(self._data, 'logentry', 'formatted') \
            or get_path(self._data, 'logentry', 'message') \
            or ''
        if error_message and not is_valid_error_message(self._project, error_message):
            return (True, FilterStatKeys.ERROR_MESSAGE)

        for exc in get_path(self._data, 'exception', 'values', filter=True, default=[]):
            message = u': '.join(
                filter(None, map(exc.get, ['type', 'value']))
            )
            if message and not is_valid_error_message(self._project, message):
                return (True, FilterStatKeys.ERROR_MESSAGE)

        for filter_cls in filters.all():
            filter_obj = filter_cls(self._project)
            if filter_obj.is_enabled() and filter_obj.test(self._data):
                return (True, six.text_type(filter_obj.id))

        return (False, None)

    def get_data(self):
        return self._data

    def _get_event_instance(self, project_id=None):
        data = self._data
        event_id = data.get('event_id')
        platform = data.get('platform')

        recorded_timestamp = data.get('timestamp')
        date = datetime.fromtimestamp(recorded_timestamp)
        date = date.replace(tzinfo=timezone.utc)
        time_spent = data.get('time_spent')

        data['node_id'] = Event.generate_node_id(project_id, event_id)

        return Event(
            project_id=project_id or self._project.id,
            event_id=event_id,
            data=EventDict(data, skip_renormalization=True),
            time_spent=time_spent,
            datetime=date,
            platform=platform
        )

    def get_culprit(self):
        """Helper to calculate the default culprit"""
        return force_text(
            self._data.get('culprit') or
            self._data.get('transaction') or
            generate_culprit(self._data) or
            ''
        )

    def get_event_type(self):
        """Returns the event type."""
        return eventtypes.get(self._data.get('type', 'default'))()

    def materialize_metadata(self):
        """Returns the materialized metadata to be merged with group or
        event data.  This currently produces the keys `type`, `metadata`,
        `title` and `location`.  This should most likely also produce
        `culprit` here.
        """
        event_type = self.get_event_type()
        event_metadata = event_type.get_metadata(self._data)
        return {
            'type': event_type.key,
            'metadata': event_metadata,
            'title': event_type.get_title(event_metadata),
            'location': event_type.get_location(event_metadata),
        }

    def get_search_message(self, event_metadata=None, culprit=None):
        """This generates the internal event.message attribute which is used
        for search purposes.  It adds a bunch of data from the metadata and
        the culprit.
        """
        if event_metadata is None:
            event_metadata = self.get_event_type().get_metadata(self._data)
        if culprit is None:
            culprit = self.get_culprit()

        data = self._data
        message = ''

        if data.get('logentry'):
            message += (data['logentry'].get('formatted') or
                        data['logentry'].get('message') or '')

        if event_metadata:
            for value in six.itervalues(event_metadata):
                value_u = force_text(value, errors='replace')
                if value_u not in message:
                    message = u'{} {}'.format(message, value_u)

        if culprit and culprit not in message:
            culprit_u = force_text(culprit, errors='replace')
            message = u'{} {}'.format(message, culprit_u)

        return trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH)

    def save(self, project_id, raw=False, assume_normalized=False):
        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize()
            self._normalized = True

        data = self._data

        project = Project.objects.get_from_cache(id=project_id)
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

        # Check to make sure we're not about to do a bunch of work that's
        # already been done if we've processed an event with this ID. (This
        # isn't a perfect solution -- this doesn't handle ``EventMapping`` and
        # there's a race condition between here and when the event is actually
        # saved, but it's an improvement. See GH-7677.)
        try:
            event = Event.objects.get(
                project_id=project.id,
                event_id=data['event_id'],
            )
        except Event.DoesNotExist:
            pass
        else:
            # Make sure we cache on the project before returning
            event._project_cache = project
            logger.info(
                'duplicate.found',
                exc_info=True,
                extra={
                    'event_uuid': data['event_id'],
                    'project_id': project.id,
                    'model': Event.__name__,
                }
            )
            return event

        # Pull out the culprit
        culprit = self.get_culprit()

        # Pull the toplevel data we're interested in
        level = data.get('level')

        # TODO(mitsuhiko): this code path should be gone by July 2018.
        # This is going to be fine because no code actually still depends
        # on integers here.  When we need an integer it will be converted
        # into one later.  Old workers used to send integers here.
        if level is not None and isinstance(level, six.integer_types):
            level = LOG_LEVELS[level]

        transaction_name = data.get('transaction')
        logger_name = data.get('logger')
        release = data.get('release')
        dist = data.get('dist')
        environment = data.get('environment')
        recorded_timestamp = data.get('timestamp')

        # We need to swap out the data with the one internal to the newly
        # created event object
        event = self._get_event_instance(project_id=project_id)
        self._data = data = event.data.data

        event._project_cache = project

        date = event.datetime
        platform = event.platform
        event_id = event.event_id

        if transaction_name:
            transaction_name = force_text(transaction_name)

        # Some of the data that are toplevel attributes are duplicated
        # into tags (logger, level, environment, transaction).  These are
        # different from legacy attributes which are normalized into tags
        # ahead of time (site, server_name).
        setdefault_path(data, 'tags', value=[])
        set_tag(data, 'level', level)
        if logger_name:
            set_tag(data, 'logger', logger_name)
        if environment:
            set_tag(data, 'environment', environment)
        if transaction_name:
            set_tag(data, 'transaction', transaction_name)

        if release:
            # dont allow a conflicting 'release' tag
            pop_tag(data, 'release')
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )
            set_tag(data, 'sentry:release', release.version)

        if dist and release:
            dist = release.add_dist(dist, date)
            # dont allow a conflicting 'dist' tag
            pop_tag(data, 'dist')
            set_tag(data, 'sentry:dist', dist.name)
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            pop_tag(data, 'user')
            set_tag(data, 'sentry:user', event_user.tag_value)

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        grouping_config = load_grouping_config(
            get_grouping_config_dict_for_event_data(data, project))
        normalize_stacktraces_for_grouping(data, grouping_config)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    if get_tag(data, key) is None:
                        set_tag(data, key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                set_tag(data, k, v)
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.path, None)

        # The active grouping config was put into the event in the
        # normalize step before.  We now also make sure that the
        # fingerprint was set to `'{{ default }}' just in case someone
        # removed it from the payload.  The call to get_hashes will then
        # look at `grouping_config` to pick the right paramters.
        data['fingerprint'] = data.get('fingerprint') or ['{{ default }}']
        apply_server_fingerprinting(data, get_fingerprinting_config_for_project(project))
        hashes = event.get_hashes()
        data['hashes'] = hashes

        # we want to freeze not just the metadata and type in but also the
        # derived attributes.  The reason for this is that we push this
        # data into kafka for snuba processing and our postprocessing
        # picks up the data right from the snuba topic.  For most usage
        # however the data is dynamically overriden by Event.title and
        # Event.location (See Event.as_dict)
        materialized_metadata = self.materialize_metadata()
        event_metadata = materialized_metadata['metadata']
        data.update(materialized_metadata)
        data['culprit'] = culprit

        # index components into ``Event.message``
        # See GH-3248
        event.message = self.get_search_message(event_metadata, culprit)
        received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s'))

        # The group gets the same metadata as the event when it's flushed but
        # additionally the `last_received` key is set.  This key is used by
        # _save_aggregate.
        group_metadata = dict(materialized_metadata)
        group_metadata['last_received'] = received_timestamp
        kwargs = {
            'platform': platform,
            'message': event.message,
            'culprit': culprit,
            'logger': logger_name,
            'level': LOG_LEVELS_MAP.get(level),
            'last_seen': date,
            'first_seen': date,
            'active_at': date,
            'data': group_metadata,
        }

        if release:
            kwargs['first_release'] = release

        try:
            group, is_new, is_regression, is_sample = self._save_aggregate(
                event=event, hashes=hashes, release=release, **kwargs
            )
        except HashDiscarded:
            event_discarded.send_robust(
                project=project,
                sender=EventManager,
            )

            metrics.incr(
                'events.discarded',
                skip_internal=True,
                tags={
                    'organization_id': project.organization_id,
                    'platform': platform,
                },
            )
            raise
        else:
            event_saved.send_robust(
                project=project,
                event_size=event.size,
                sender=EventManager,
            )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        # When an event was sampled, the canonical source of truth
        # is the EventMapping table since we aren't going to be writing out an actual
        # Event row. Otherwise, if the Event isn't being sampled, we can safely
        # rely on the Event table itself as the source of truth and ignore
        # EventMapping since it's redundant information.
        if is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(EventMapping)):
                    EventMapping.objects.create(project=project, group=group, event_id=event_id)
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': EventMapping.__name__,
                    }
                )
                return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
            group_id=group.id,
            environment_id=environment.id,
            defaults={
                'first_release': release if release else None,
            },
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            ReleaseProjectEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(
            group=group,
            environment=environment,
        )

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': Event.__name__,
                    }
                )
                return event

            tagstore.delay_index_event_tags(
                organization_id=project.organization_id,
                project_id=project.id,
                group_id=group.id,
                environment_id=environment.id,
                event_id=event.id,
                tags=event.tags,
                date_added=event.datetime,
            )

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )),
                    (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )),
                ),
                timestamp=event.datetime,
                environment_id=environment.id,
            )
        if release:
            if is_new:
                buffer.incr(
                    ReleaseProject, {'new_groups': 1}, {
                        'release_id': release.id,
                        'project_id': project.id,
                    }
                )
            if is_new_group_environment:
                buffer.incr(
                    ReleaseProjectEnvironment, {'new_issues_count': 1}, {
                        'project_id': project.id,
                        'release_id': release.id,
                        'environment_id': environment.id,
                    }
                )

        safe_execute(
            Group.objects.add_tags,
            group,
            environment,
            event.get_tags(),
            _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send_robust(project=project, group=group, sender=Project)

        eventstream.insert(
            group=group,
            event=event,
            is_new=is_new,
            is_sample=is_sample,
            is_regression=is_regression,
            is_new_group_environment=is_new_group_environment,
            primary_hash=hashes[0],
            # We are choosing to skip consuming the event back
            # in the eventstream if it's flagged as raw.
            # This means that we want to publish the event
            # through the event stream, but we don't care
            # about post processing and handling the commit.
            skip_consume=raw,
        )

        metrics.timing(
            'events.latency',
            received_timestamp - recorded_timestamp,
            tags={
                'project_id': project.id,
            },
        )

        metrics.timing(
            'events.size.data.post_save',
            event.size,
            tags={'project_id': project.id}
        )

        return event

    def _get_event_user(self, project, data):
        user_data = data.get('user')
        if not user_data:
            return

        euser = EventUser(
            project_id=project.id,
            ident=user_data.get('id'),
            email=user_data.get('email'),
            username=user_data.get('username'),
            ip_address=user_data.get('ip_address'),
            name=user_data.get('name'),
        )
        euser.set_hash()
        if not euser.hash:
            return

        cache_key = u'euserid:1:{}:{}'.format(
            project.id,
            euser.hash,
        )
        euser_id = default_cache.get(cache_key)
        if euser_id is None:
            try:
                with transaction.atomic(using=router.db_for_write(EventUser)):
                    euser.save()
            except IntegrityError:
                try:
                    euser = EventUser.objects.get(
                        project_id=project.id,
                        hash=euser.hash,
                    )
                except EventUser.DoesNotExist:
                    # why???
                    e_userid = -1
                else:
                    if euser.name != (user_data.get('name') or euser.name):
                        euser.update(
                            name=user_data['name'],
                        )
                    e_userid = euser.id
                default_cache.set(cache_key, e_userid, 3600)
        return euser

    def _find_hashes(self, project, hash_list):
        return map(
            lambda hash: GroupHash.objects.get_or_create(
                project=project,
                hash=hash,
            )[0],
            hash_list,
        )

    def _save_aggregate(self, event, hashes, release, **kwargs):
        project = event.project

        # attempt to find a matching hash
        all_hashes = self._find_hashes(project, hashes)

        existing_group_id = None
        for h in all_hashes:
            if h.group_id is not None:
                existing_group_id = h.group_id
                break
            if h.group_tombstone_id is not None:
                raise HashDiscarded('Matches group tombstone %s' % h.group_tombstone_id)

        # XXX(dcramer): this has the opportunity to create duplicate groups
        # it should be resolved by the hash merging function later but this
        # should be better tested/reviewed
        if existing_group_id is None:
            # it's possible the release was deleted between
            # when we queried for the release and now, so
            # make sure it still exists
            first_release = kwargs.pop('first_release', None)

            with transaction.atomic():
                short_id = project.next_short_id()
                group, group_is_new = Group.objects.create(
                    project=project,
                    short_id=short_id,
                    first_release_id=Release.objects.filter(
                        id=first_release.id,
                    ).values_list('id', flat=True).first() if first_release else None,
                    **kwargs
                ), True

            metrics.incr(
                'group.created',
                skip_internal=True,
                tags={'platform': event.platform or 'unknown'}
            )

        else:
            group = Group.objects.get(id=existing_group_id)

            group_is_new = False

        # If all hashes are brand new we treat this event as new
        is_new = False
        new_hashes = [h for h in all_hashes if h.group_id is None]
        if new_hashes:
            # XXX: There is a race condition here wherein another process could
            # create a new group that is associated with one of the new hashes,
            # add some event(s) to it, and then subsequently have the hash
            # "stolen" by this process. This then "orphans" those events from
            # their "siblings" in the group we've created here. We don't have a
            # way to fix this, since we can't update the group on those hashes
            # without filtering on `group_id` (which we can't do due to query
            # planner weirdness.) For more context, see 84c6f75a and d0e22787,
            # as well as GH-5085.
            GroupHash.objects.filter(
                id__in=[h.id for h in new_hashes],
            ).exclude(
                state=GroupHash.State.LOCKED_IN_MIGRATION,
            ).update(group=group)

            if group_is_new and len(new_hashes) == len(all_hashes):
                is_new = True

        # XXX(dcramer): it's important this gets called **before** the aggregate
        # is processed as otherwise values like last_seen will get mutated
        can_sample = (
            features.has('projects:sample-events', project=project) and should_sample(
                event.data.get('received') or float(event.datetime.strftime('%s')),
                group.data.get('last_received') or float(group.last_seen.strftime('%s')),
                group.times_seen,
            )
        )

        if not is_new:
            is_regression = self._process_existing_aggregate(
                group=group,
                event=event,
                data=kwargs,
                release=release,
            )
        else:
            is_regression = False

        # Determine if we've sampled enough data to store this event
        if is_new or is_regression:
            is_sample = False
        else:
            is_sample = can_sample

        if not is_sample:
            GroupHash.record_last_processed_event_id(
                all_hashes[0].id,
                event.event_id,
            )

        return group, is_new, is_regression, is_sample

    def _handle_regression(self, group, event, release):
        if not group.is_resolved():
            return

        # we only mark it as a regression if the event's release is newer than
        # the release which we originally marked this as resolved
        elif GroupResolution.has_resolution(group, release):
            return

        elif has_pending_commit_resolution(group):
            return

        if not plugin_is_regression(group, event):
            return

        # we now think its a regression, rely on the database to validate that
        # no one beat us to this
        date = max(event.datetime, group.last_seen)
        is_regression = bool(
            Group.objects.filter(
                id=group.id,
                # ensure we cant update things if the status has been set to
                # ignored
                status__in=[GroupStatus.RESOLVED, GroupStatus.UNRESOLVED],
            ).exclude(
                # add to the regression window to account for races here
                active_at__gte=date - timedelta(seconds=5),
            ).update(
                active_at=date,
                # explicitly set last_seen here as ``is_resolved()`` looks
                # at the value
                last_seen=date,
                status=GroupStatus.UNRESOLVED
            )
        )

        group.active_at = date
        group.status = GroupStatus.UNRESOLVED

        if is_regression and release:
            # resolutions are only valid if the state of the group is still
            # resolved -- if it were to change the resolution should get removed
            try:
                resolution = GroupResolution.objects.get(
                    group=group,
                )
            except GroupResolution.DoesNotExist:
                affected = False
            else:
                cursor = connection.cursor()
                # delete() API does not return affected rows
                cursor.execute("DELETE FROM sentry_groupresolution WHERE id = %s", [resolution.id])
                affected = cursor.rowcount > 0

            if affected:
                # if we had to remove the GroupResolution (i.e. we beat the
                # the queue to handling this) then we need to also record
                # the corresponding event
                try:
                    activity = Activity.objects.filter(
                        group=group,
                        type=Activity.SET_RESOLVED_IN_RELEASE,
                        ident=resolution.id,
                    ).order_by('-datetime')[0]
                except IndexError:
                    # XXX: handle missing data, as its not overly important
                    pass
                else:
                    activity.update(data={
                        'version': release.version,
                    })

        if is_regression:
            activity = Activity.objects.create(
                project=group.project,
                group=group,
                type=Activity.SET_REGRESSION,
                data={
                    'version': release.version if release else '',
                }
            )
            activity.send_notification()

            kick_off_status_syncs.apply_async(kwargs={
                'project_id': group.project_id,
                'group_id': group.id,
            })

        return is_regression

    def _process_existing_aggregate(self, group, event, data, release):
        date = max(event.datetime, group.last_seen)
        extra = {
            'last_seen': date,
            'score': ScoreClause(group),
            'data': data['data'],
        }
        if event.message and event.message != group.message:
            extra['message'] = event.message
        if group.level != data['level']:
            extra['level'] = data['level']
        if group.culprit != data['culprit']:
            extra['culprit'] = data['culprit']

        is_regression = self._handle_regression(group, event, release)

        group.last_seen = extra['last_seen']

        update_kwargs = {
            'times_seen': 1,
        }

        buffer.incr(Group, update_kwargs, {
            'id': group.id,
        }, extra)

        return is_regression
Esempio n. 37
0
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None,
                   project_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas
    from sentry.models import ProjectKey
    from sentry.utils.outcomes import Outcome, track_outcome

    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    key_id = None if data is None else data.get('key_id')
    if key_id is not None:
        key_id = int(key_id)
    timestamp = to_datetime(start_time) if start_time is not None else None

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr(
            'events.failed',
            tags={
                'reason': 'cache',
                'stage': 'post'},
            skip_internal=False)
        return

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    event = None
    try:
        manager = EventManager(data)
        event = manager.save(project_id, assume_normalized=True)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has('organizations:event-attachments', event.project.organization, actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

        # This is where we can finally say that we have accepted the event.
        track_outcome(
            event.project.organization_id,
            event.project.id,
            key_id,
            Outcome.ACCEPTED,
            None,
            timestamp,
            event_id
        )

    except HashDiscarded:
        project = Project.objects.get_from_cache(id=project_id)
        reason = FilterStatKeys.DISCARDED_HASH
        project_key = None
        try:
            if key_id is not None:
                project_key = ProjectKey.objects.get_from_cache(id=key_id)
        except ProjectKey.DoesNotExist:
            pass

        quotas.refund(project, key=project_key, timestamp=start_time)
        track_outcome(
            project.organization_id,
            project_id,
            key_id,
            Outcome.FILTERED,
            reason,
            timestamp,
            event_id
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)

            # For the unlikely case that we did not manage to persist the
            # event we also delete the key always.
            if event is None or \
               features.has('organizations:event-attachments', event.project.organization, actor=None):
                attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing(
                'events.time-to-process',
                time() - start_time,
                instance=data['platform'])
Esempio n. 38
0
def _do_process_event(cache_key, start_time, event_id, process_task,
                      data=None):
    from sentry.plugins import plugins

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr(
            'events.failed',
            tags={
                'reason': 'cache',
                'stage': 'process'},
            skip_internal=False)
        error_logger.error('process.failed.empty', extra={'cache_key': cache_key})
        return

    data = CanonicalKeyDict(data)
    project_id = data['project']

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    try:
        # Event enhancers.  These run before anything else.
        for plugin in plugins.all(version=2):
            enhancers = safe_execute(plugin.get_event_enhancers, data=data)
            for enhancer in (enhancers or ()):
                enhanced = safe_execute(enhancer, data, _passthrough_errors=(RetrySymbolication,))
                if enhanced:
                    data = enhanced
                    has_changed = True

        # Stacktrace based event processors.
        new_data = process_stacktraces(data)
        if new_data is not None:
            has_changed = True
            data = new_data
    except RetrySymbolication as e:
        if start_time and (time() - start_time) > 3600:
            raise RuntimeError('Event spent one hour in processing')

        retry_process_event.apply_async(
            args=(),
            kwargs={
                'process_task_name': process_task.__name__,
                'task_kwargs': {
                    'cache_key': cache_key,
                    'event_id': event_id,
                    'start_time': start_time,
                }
            },
            countdown=e.retry_after
        )
        return

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(
            plugin.get_event_preprocessors, data=data, _with_transaction=False
        )
        for processor in (processors or ()):
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data['project'] == project_id, 'Project cannot be mutated by preprocessor'
    project = Project.objects.get_from_cache(id=project_id)

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        issues = data.get('processing_issues')
        try:
            if issues and create_failed_event(
                cache_key, project_id, list(issues.values()),
                event_id=event_id, start_time=start_time,
                reprocessing_rev=reprocessing_rev
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            from_reprocessing = process_task is process_event_from_reprocessing
            submit_process(project, from_reprocessing, cache_key, event_id, start_time, data)
            process_task.delay(cache_key, start_time=start_time,
                               event_id=event_id)
            return

        default_cache.set(cache_key, data, 3600)

    submit_save_event(project, cache_key, event_id, start_time, data)