def _get_grouping_variants(event):
        from sentry.grouping.api import get_grouping_variants_for_event, load_grouping_config
        from sentry.stacktraces.processing import normalize_stacktraces_for_grouping

        config = event.data.get("grouping_config")
        config = load_grouping_config(config)
        return get_grouping_variants_for_event(event, config)
Beispiel #2
0
    def tes_macos_package_in_app_detection(self):
        data = {
            "platform": "cocoa",
            "debug_meta": {"images": []},  # omitted
            "exception": {
                "values": [
                    {
                        "stacktrace": {
                            "frames": [
                                {
                                    "function": "-[CRLCrashAsyncSafeThread crash]",
                                    "package": "/Users/haza/Library/Developer/Xcode/Archives/2017-06-19/CrashProbe 19-06-2017, 08.53.xcarchive/Products/Applications/CrashProbe.app/Contents/Frameworks/CrashLib.framework/Versions/A/CrashLib",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "[KSCrash ]",
                                    "package": "/usr/lib/system/libdyld.dylib",
                                    "instruction_addr": 4295098388,
                                },
                            ]
                        },
                        "type": "NSRangeException",
                    }
                ]
            },
            "contexts": {"os": {"version": "10.12.5", "type": "os", "name": "macOS"}},
        }

        config = load_grouping_config(get_default_grouping_config_dict())
        normalize_stacktraces_for_grouping(data, grouping_config=config)

        frames = data["exception"]["values"][0]["stacktrace"]["frames"]
        assert frames[0]["in_app"] is True
        assert frames[1]["in_app"] is False
Beispiel #3
0
    def test_ios_package_in_app_detection(self):
        data = {
            "platform": "native",
            "stacktrace": {
                "frames": [
                    {
                        "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                        "instruction_addr": "0x1000",
                    },
                    {
                        "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/Frameworks/foo.dylib",
                        "instruction_addr": "0x2000",
                    },
                    {
                        "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/Frameworks/libswiftCore.dylib",
                        "instruction_addr": "0x3000",
                    },
                    {"package": "/usr/lib/whatever.dylib", "instruction_addr": "0x4000"},
                ]
            },
        }

        config = load_grouping_config(get_default_grouping_config_dict())
        normalize_stacktraces_for_grouping(data, grouping_config=config)

        # App object should be in_app
        assert data["stacktrace"]["frames"][0]["in_app"] is True
        # Framework should be in app (but optional)
        assert data["stacktrace"]["frames"][1]["in_app"] is True
        # libswift should not be system
        assert data["stacktrace"]["frames"][2]["in_app"] is False
        # Unknown object should default to not in_app
        assert data["stacktrace"]["frames"][3]["in_app"] is False
Beispiel #4
0
    def get_grouping_variants(self, force_config=None, normalize_stacktraces=False):
        """
        This is similar to `get_hashes` but will instead return the
        grouping components for each variant in a dictionary.

        If `normalize_stacktraces` is set to `True` then the event data will be
        modified for `in_app` in addition to event variants being created.  This
        means that after calling that function the event data has been modified
        in place.
        """
        from sentry.grouping.api import get_grouping_variants_for_event, load_grouping_config
        from sentry.stacktraces.processing import normalize_stacktraces_for_grouping

        # Forcing configs has two separate modes.  One is where just the
        # config ID is given in which case it's merged with the stored or
        # default config dictionary
        if force_config is not None:
            if isinstance(force_config, six.string_types):
                stored_config = self.get_grouping_config()
                config = dict(stored_config)
                config["id"] = force_config
            else:
                config = force_config

        # Otherwise we just use the same grouping config as stored.  if
        # this is None the `get_grouping_variants_for_event` will fill in
        # the default.
        else:
            config = self.data.get("grouping_config")

        config = load_grouping_config(config)
        if normalize_stacktraces:
            normalize_stacktraces_for_grouping(self.data, config)

        return get_grouping_variants_for_event(self, config)
    def create_event(self, grouping_config):
        grouping_input = dict(self.data)
        # Customize grouping config from the _grouping config
        grouping_info = grouping_input.pop("_grouping", None) or {}
        enhancement_base = grouping_info.get("enhancement_base")
        enhancements = grouping_info.get("enhancements")
        if enhancement_base or enhancements:
            enhancement_bases = [enhancement_base] if enhancement_base else []
            e = Enhancements.from_config_string(enhancements or "",
                                                bases=enhancement_bases)
            grouping_config["enhancements"] = e.dumps()

        # Normalize the event
        mgr = EventManager(data=grouping_input,
                           grouping_config=grouping_config)
        mgr.normalize()
        data = mgr.get_data()

        # Normalize the stacktrace for grouping.  This normally happens in
        # save()
        normalize_stacktraces_for_grouping(
            data, load_grouping_config(grouping_config))
        evt = eventstore.create_event(data=data)

        return evt
Beispiel #6
0
    def get_grouping_variants(self,
                              force_config=None,
                              normalize_stacktraces=False):
        """
        This is similar to `get_hashes` but will instead return the
        grouping components for each variant in a dictionary.

        If `normalize_stacktraces` is set to `True` then the event data will be
        modified for `in_app` in addition to event variants being created.  This
        means that after calling that function the event data has been modified
        in place.
        """
        from sentry.grouping.api import get_grouping_variants_for_event, load_grouping_config

        # Forcing configs has two separate modes.  One is where just the
        # config ID is given in which case it's merged with the stored or
        # default config dictionary
        if force_config is not None:
            if isinstance(force_config, str):
                stored_config = self.get_grouping_config()
                config = dict(stored_config)
                config["id"] = force_config
            else:
                config = force_config

        # Otherwise we just use the same grouping config as stored.  if
        # this is None we use the project's default config.
        else:
            config = self.get_grouping_config()

        config = load_grouping_config(config)

        if normalize_stacktraces:
            with sentry_sdk.start_span(
                    op="grouping.normalize_stacktraces_for_grouping") as span:
                span.set_tag("project", self.project_id)
                span.set_tag("event_id", self.event_id)
                self.normalize_stacktraces_for_grouping(config)

        with sentry_sdk.start_span(
                op="grouping.get_grouping_variants") as span:
            span.set_tag("project", self.project_id)
            span.set_tag("event_id", self.event_id)

            return get_grouping_variants_for_event(self, config)
Beispiel #7
0
    def ios_function_name_in_app_detection(self, function, isInApp: bool):
        data = {
            "platform": "cocoa",
            "debug_meta": {
                "images": []
            },  # omitted
            "exception": {
                "values": [{
                    "stacktrace": {
                        "frames": [
                            {
                                "function": function,
                                "package":
                                "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                "instruction_addr": 4295098388,
                            },
                            # We need two frames otherwise all frames are inApp
                            {
                                "function": "[KSCrash ]",
                                "package": "/usr/lib/system/libdyld.dylib",
                                "instruction_addr": 4295098388,
                            },
                        ]
                    },
                    "type": "NSRangeException",
                }]
            },
            "contexts": {
                "os": {
                    "version": "9.3.2",
                    "type": "os",
                    "name": "iOS"
                }
            },
        }

        config = load_grouping_config(get_default_grouping_config_dict())
        normalize_stacktraces_for_grouping(data, grouping_config=config)

        frames = data["exception"]["values"][0]["stacktrace"]["frames"]
        assert frames[0]["in_app"] is isInApp, ("For function: " + function +
                                                " expected:" + str(isInApp))
Beispiel #8
0
def test_event_hash_variant(insta_snapshot, config_name, test_name, log):
    with open(os.path.join(_fixture_path, test_name + ".json")) as f:
        input = json.load(f)

    # Customize grouping config from the _grouping config
    grouping_config = get_default_grouping_config_dict(config_name)
    grouping_info = input.pop("_grouping", None) or {}
    enhancement_base = grouping_info.get("enhancement_base")
    enhancements = grouping_info.get("enhancements")
    if enhancement_base or enhancements:
        enhancement_bases = [enhancement_base] if enhancement_base else []
        e = Enhancements.from_config_string(enhancements or "",
                                            bases=enhancement_bases)
        grouping_config["enhancements"] = e.dumps()

    # Normalize the event
    mgr = EventManager(data=input, grouping_config=grouping_config)
    mgr.normalize()
    data = mgr.get_data()

    # Normalize the stacktrace for grouping.  This normally happens in
    # save()
    normalize_stacktraces_for_grouping(data,
                                       load_grouping_config(grouping_config))
    evt = eventstore.create_event(data=data)

    # Make sure we don't need to touch the DB here because this would
    # break stuff later on.
    evt.project = None

    rv = []
    for (key, value) in sorted(evt.get_grouping_variants().items()):
        if rv:
            rv.append("-" * 74)
        rv.append("%s:" % key)
        dump_variant(value, rv, 1)
    output = "\n".join(rv)
    log(repr(evt.get_hashes()))

    assert evt.get_grouping_config() == grouping_config

    insta_snapshot(output)
Beispiel #9
0
    def tes_macos_package_in_app_detection(self):
        data = {
            "platform": "cocoa",
            "debug_meta": {
                "images": []  # omitted
            },
            "exception": {
                "values": [
                    {
                        "stacktrace": {
                            "frames": [
                                {
                                    "function": "-[CRLCrashAsyncSafeThread crash]",
                                    "package": "/Users/haza/Library/Developer/Xcode/Archives/2017-06-19/CrashProbe 19-06-2017, 08.53.xcarchive/Products/Applications/CrashProbe.app/Contents/Frameworks/CrashLib.framework/Versions/A/CrashLib",
                                    "instruction_addr": 4295098388
                                },
                                {
                                    "function": "[KSCrash ]",
                                    "package": "/usr/lib/system/libdyld.dylib",
                                    "instruction_addr": 4295098388,
                                },
                            ]
                        },
                        "type": "NSRangeException",
                    }
                ]
            },
            "contexts": {
                "os": {
                    "version": "10.12.5",
                    "type": "os",
                    "name": "macOS"
                }
            },
        }

        config = load_grouping_config(get_default_grouping_config_dict())
        normalize_stacktraces_for_grouping(data, grouping_config=config)

        frames = data['exception']['values'][0]['stacktrace']['frames']
        assert frames[0]['in_app'] is True
        assert frames[1]['in_app'] is False
    def test_ios_package_in_app_detection(self):
        data = {
            'platform': 'native',
            'stacktrace': {
                'frames': [
                    {
                        'package':
                        '/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest',
                        'instruction_addr': '0x1000'
                    },
                    {
                        'package':
                        '/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/Frameworks/foo.dylib',
                        'instruction_addr': '0x2000'
                    },
                    {
                        'package':
                        '/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/Frameworks/libswiftCore.dylib',
                        'instruction_addr': '0x3000'
                    },
                    {
                        'package': '/usr/lib/whatever.dylib',
                        'instruction_addr': '0x4000'
                    },
                ]
            }
        }

        config = load_grouping_config(get_default_grouping_config_dict())
        normalize_stacktraces_for_grouping(data, grouping_config=config)

        # App object should be in_app
        assert data['stacktrace']['frames'][0]['in_app'] is True
        # Framework should be in app (but optional)
        assert data['stacktrace']['frames'][1]['in_app'] is True
        # libswift should not be system
        assert data['stacktrace']['frames'][2]['in_app'] is False
        # Unknown object should default to not in_app
        assert data['stacktrace']['frames'][3]['in_app'] is False
Beispiel #11
0
    def test_ios_package_in_app_detection(self):
        data = {
            'platform': 'native',
            'stacktrace': {
                'frames': [
                    {
                        'package': '/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest',
                        'instruction_addr': '0x1000'
                    },
                    {
                        'package': '/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/Frameworks/foo.dylib',
                        'instruction_addr': '0x2000'
                    },
                    {
                        'package': '/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/Frameworks/libswiftCore.dylib',
                        'instruction_addr': '0x3000'
                    },
                    {
                        'package': '/usr/lib/whatever.dylib',
                        'instruction_addr': '0x4000'
                    },
                ]
            }
        }

        config = load_grouping_config(get_default_grouping_config_dict())
        normalize_stacktraces_for_grouping(data, grouping_config=config)

        # App object should be in_app
        assert data['stacktrace']['frames'][0]['in_app'] is True
        # Framework should be in app (but optional)
        assert data['stacktrace']['frames'][1]['in_app'] is True
        # libswift should not be system
        assert data['stacktrace']['frames'][2]['in_app'] is False
        # Unknown object should default to not in_app
        assert data['stacktrace']['frames'][3]['in_app'] is False
Beispiel #12
0
    def save(self,
             project_id,
             raw=False,
             assume_normalized=False,
             cache_key=None):
        """
        We re-insert events with duplicate IDs into Snuba, which is responsible
        for deduplicating events. Since deduplication in Snuba is on the primary
        key (based on event ID, project ID and day), events with same IDs are only
        deduplicated if their timestamps fall on the same day. The latest event
        always wins and overwrites the value of events received earlier in that day.

        Since we increment counters and frequencies here before events get inserted
        to eventstream these numbers may be larger than the total number of
        events if we receive duplicate event IDs that fall on the same day
        (that do not hit cache first).
        """

        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize()
            self._normalized = True

        data = self._data

        project = Project.objects.get_from_cache(id=project_id)
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

        # Pull out the culprit
        culprit = self.get_culprit()

        # Pull the toplevel data we're interested in
        level = data.get("level")

        # TODO(mitsuhiko): this code path should be gone by July 2018.
        # This is going to be fine because no code actually still depends
        # on integers here.  When we need an integer it will be converted
        # into one later.  Old workers used to send integers here.
        if level is not None and isinstance(level, six.integer_types):
            level = LOG_LEVELS[level]

        transaction_name = data.get("transaction")
        logger_name = data.get("logger")
        release = data.get("release")
        dist = data.get("dist")
        environment = data.get("environment")
        recorded_timestamp = data.get("timestamp")

        # We need to swap out the data with the one internal to the newly
        # created event object
        event = self._get_event_instance(project_id=project_id)
        self._data = data = event.data.data

        event._project_cache = project

        date = event.datetime
        platform = event.platform
        event_id = event.event_id

        if transaction_name:
            transaction_name = force_text(transaction_name)

        # Right now the event type is the signal to skip the group. This
        # is going to change a lot.
        if event.get_event_type() == "transaction":
            issueless_event = True
        else:
            issueless_event = False

        # Some of the data that are toplevel attributes are duplicated
        # into tags (logger, level, environment, transaction).  These are
        # different from legacy attributes which are normalized into tags
        # ahead of time (site, server_name).
        setdefault_path(data, "tags", value=[])
        set_tag(data, "level", level)
        if logger_name:
            set_tag(data, "logger", logger_name)
        if environment:
            set_tag(data, "environment", environment)
        if transaction_name:
            set_tag(data, "transaction", transaction_name)

        if release:
            # dont allow a conflicting 'release' tag
            pop_tag(data, "release")
            release = Release.get_or_create(project=project,
                                            version=release,
                                            date_added=date)
            set_tag(data, "sentry:release", release.version)

        if dist and release:
            dist = release.add_dist(dist, date)
            # dont allow a conflicting 'dist' tag
            pop_tag(data, "dist")
            set_tag(data, "sentry:dist", dist.name)
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            pop_tag(data, "user")
            set_tag(data, "sentry:user", event_user.tag_value)

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        grouping_config = load_grouping_config(
            get_grouping_config_dict_for_event_data(data, project))
        normalize_stacktraces_for_grouping(data, grouping_config)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags,
                                      event,
                                      _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    if get_tag(data, key) is None:
                        set_tag(data, key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                set_tag(data, k, v)
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.path, None)

        # The active grouping config was put into the event in the
        # normalize step before.  We now also make sure that the
        # fingerprint was set to `'{{ default }}' just in case someone
        # removed it from the payload.  The call to get_hashes will then
        # look at `grouping_config` to pick the right parameters.
        data["fingerprint"] = data.get("fingerprint") or ["{{ default }}"]
        apply_server_fingerprinting(
            data, get_fingerprinting_config_for_project(project))

        # Here we try to use the grouping config that was requested in the
        # event.  If that config has since been deleted (because it was an
        # experimental grouping config) we fall back to the default.
        try:
            hashes = event.get_hashes()
        except GroupingConfigNotFound:
            data["grouping_config"] = get_grouping_config_dict_for_project(
                project)
            hashes = event.get_hashes()

        data["hashes"] = hashes

        # we want to freeze not just the metadata and type in but also the
        # derived attributes.  The reason for this is that we push this
        # data into kafka for snuba processing and our postprocessing
        # picks up the data right from the snuba topic.  For most usage
        # however the data is dynamically overridden by Event.title and
        # Event.location (See Event.as_dict)
        materialized_metadata = self.materialize_metadata()
        data.update(materialized_metadata)
        data["culprit"] = culprit

        received_timestamp = event.data.get("received") or float(
            event.datetime.strftime("%s"))

        if not issueless_event:
            # The group gets the same metadata as the event when it's flushed but
            # additionally the `last_received` key is set.  This key is used by
            # _save_aggregate.
            group_metadata = dict(materialized_metadata)
            group_metadata["last_received"] = received_timestamp
            kwargs = {
                "platform": platform,
                "message": event.search_message,
                "culprit": culprit,
                "logger": logger_name,
                "level": LOG_LEVELS_MAP.get(level),
                "last_seen": date,
                "first_seen": date,
                "active_at": date,
                "data": group_metadata,
            }

            if release:
                kwargs["first_release"] = release

            try:
                group, is_new, is_regression = self._save_aggregate(
                    event=event, hashes=hashes, release=release, **kwargs)
            except HashDiscarded:
                event_discarded.send_robust(project=project,
                                            sender=EventManager)

                metrics.incr(
                    "events.discarded",
                    skip_internal=True,
                    tags={
                        "organization_id": project.organization_id,
                        "platform": platform
                    },
                )
                raise
            else:
                event_saved.send_robust(project=project,
                                        event_size=event.size,
                                        sender=EventManager)
            event.group = group
        else:
            group = None
            is_new = False
            is_regression = False
            event_saved.send_robust(project=project,
                                    event_size=event.size,
                                    sender=EventManager)

        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        environment = Environment.get_or_create(project=project,
                                                name=environment)

        if group:
            group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
                group_id=group.id,
                environment_id=environment.id,
                defaults={"first_release": release if release else None},
            )
        else:
            is_new_group_environment = False

        if release:
            ReleaseEnvironment.get_or_create(project=project,
                                             release=release,
                                             environment=environment,
                                             datetime=date)

            ReleaseProjectEnvironment.get_or_create(project=project,
                                                    release=release,
                                                    environment=environment,
                                                    datetime=date)

            if group:
                grouprelease = GroupRelease.get_or_create(
                    group=group,
                    release=release,
                    environment=environment,
                    datetime=date)

        counters = [(tsdb.models.project, project.id)]

        if group:
            counters.append((tsdb.models.group, group.id))

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters,
                        timestamp=event.datetime,
                        environment_id=environment.id)

        frequencies = []

        if group:
            frequencies.append((tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1
                }
            }))

            if release:
                frequencies.append((tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1
                    }
                }))
        if frequencies:
            tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        if group:
            UserReport.objects.filter(project=project,
                                      event_id=event_id).update(
                                          group=group, environment=environment)

        # Enusre the _metrics key exists. This is usually created during
        # and prefilled with ingestion sizes.
        event_metrics = event.data.get("_metrics") or {}
        event.data["_metrics"] = event_metrics

        # Capture the actual size that goes into node store.
        event_metrics["bytes.stored.event"] = len(
            json.dumps(dict(event.data.items())))

        # Load attachments first, but persist them at the very last after
        # posting to eventstream to make sure all counters and eventstream are
        # incremented for sure.
        attachments = self.get_attachments(cache_key, event)
        for attachment in attachments:
            key = "bytes.stored.%s" % (attachment.type, )
            event_metrics[key] = (event_metrics.get(key) or 0) + len(
                attachment.data)

        # Write the event to Nodestore
        event.data.save()

        if event_user:
            counters = [(tsdb.models.users_affected_by_project, project.id,
                         (event_user.tag_value, ))]

            if group:
                counters.append((tsdb.models.users_affected_by_group, group.id,
                                 (event_user.tag_value, )))

            tsdb.record_multi(counters,
                              timestamp=event.datetime,
                              environment_id=environment.id)

        if release:
            if is_new:
                buffer.incr(
                    ReleaseProject,
                    {"new_groups": 1},
                    {
                        "release_id": release.id,
                        "project_id": project.id
                    },
                )
            if is_new_group_environment:
                buffer.incr(
                    ReleaseProjectEnvironment,
                    {"new_issues_count": 1},
                    {
                        "project_id": project.id,
                        "release_id": release.id,
                        "environment_id": environment.id,
                    },
                )

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send_robust(project=project,
                                                 event=event,
                                                 sender=Project)

        eventstream.insert(
            group=group,
            event=event,
            is_new=is_new,
            is_regression=is_regression,
            is_new_group_environment=is_new_group_environment,
            primary_hash=hashes[0],
            # We are choosing to skip consuming the event back
            # in the eventstream if it's flagged as raw.
            # This means that we want to publish the event
            # through the event stream, but we don't care
            # about post processing and handling the commit.
            skip_consume=raw,
        )

        # Do this last to ensure signals get emitted even if connection to the
        # file store breaks temporarily.
        self.save_attachments(attachments, event)

        metric_tags = {"from_relay": "_relay_processed" in self._data}

        metrics.timing("events.latency",
                       received_timestamp - recorded_timestamp,
                       tags=metric_tags)
        metrics.timing("events.size.data.post_save",
                       event.size,
                       tags=metric_tags)
        metrics.incr(
            "events.post_save.normalize.errors",
            amount=len(self._data.get("errors") or ()),
            tags=metric_tags,
        )

        return event
Beispiel #13
0
    def save(self,
             project_id,
             raw=False,
             assume_normalized=False,
             cache_key=None):
        """
        After normalizing and processing an event, save adjacent models such as
        releases and environments to postgres and write the event into
        eventstream. From there it will be picked up by Snuba and
        post-processing.

        We re-insert events with duplicate IDs into Snuba, which is responsible
        for deduplicating events. Since deduplication in Snuba is on the primary
        key (based on event ID, project ID and day), events with same IDs are only
        deduplicated if their timestamps fall on the same day. The latest event
        always wins and overwrites the value of events received earlier in that day.

        Since we increment counters and frequencies here before events get inserted
        to eventstream these numbers may be larger than the total number of
        events if we receive duplicate event IDs that fall on the same day
        (that do not hit cache first).
        """

        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize()
            self._normalized = True

        with metrics.timer("event_manager.save.project.get_from_cache"):
            project = Project.objects.get_from_cache(id=project_id)

        with metrics.timer("event_manager.save.organization.get_from_cache"):
            project._organization_cache = Organization.objects.get_from_cache(
                id=project.organization_id)

        job = {"data": self._data, "project_id": project_id, "raw": raw}
        jobs = [job]
        projects = {project.id: project}

        _pull_out_data(jobs, projects)

        # Right now the event type is the signal to skip the group. This
        # is going to change a lot.
        if job["event"].get_event_type() == "transaction":
            issueless_event = True
        else:
            issueless_event = False

        _get_or_create_release_many(jobs, projects)

        # XXX: remove
        if job["dist"] and job["release"]:
            job["dist"] = job["release"].add_dist(job["dist"],
                                                  job["event"].datetime)
            # dont allow a conflicting 'dist' tag
            pop_tag(job["data"], "dist")
            set_tag(job["data"], "sentry:dist", job["dist"].name)
        else:
            job["dist"] = None

        _get_event_user_many(jobs, projects)

        with metrics.timer("event_manager.load_grouping_config"):
            # At this point we want to normalize the in_app values in case the
            # clients did not set this appropriately so far.
            grouping_config = load_grouping_config(
                get_grouping_config_dict_for_event_data(job["data"], project))

        with metrics.timer("event_manager.normalize_stacktraces_for_grouping"):
            normalize_stacktraces_for_grouping(job["data"], grouping_config)

        _derive_plugin_tags_many(jobs, projects)
        _derive_interface_tags_many(jobs)

        with metrics.timer("event_manager.apply_server_fingerprinting"):
            # The active grouping config was put into the event in the
            # normalize step before.  We now also make sure that the
            # fingerprint was set to `'{{ default }}' just in case someone
            # removed it from the payload.  The call to get_hashes will then
            # look at `grouping_config` to pick the right parameters.
            job["data"]["fingerprint"] = job["data"].get("fingerprint") or [
                "{{ default }}"
            ]
            apply_server_fingerprinting(
                job["data"], get_fingerprinting_config_for_project(project))

        with metrics.timer("event_manager.event.get_hashes"):
            # Here we try to use the grouping config that was requested in the
            # event.  If that config has since been deleted (because it was an
            # experimental grouping config) we fall back to the default.
            try:
                hashes = job["event"].get_hashes()
            except GroupingConfigNotFound:
                job["data"][
                    "grouping_config"] = get_grouping_config_dict_for_project(
                        project)
                hashes = job["event"].get_hashes()

        job["data"]["hashes"] = hashes

        _materialize_metadata_many(jobs)

        job["received_timestamp"] = received_timestamp = job["event"].data.get(
            "received") or float(job["event"].datetime.strftime("%s"))

        if not issueless_event:
            # The group gets the same metadata as the event when it's flushed but
            # additionally the `last_received` key is set.  This key is used by
            # _save_aggregate.
            group_metadata = dict(job["materialized_metadata"])
            group_metadata["last_received"] = received_timestamp
            kwargs = {
                "platform": job["platform"],
                "message": job["event"].search_message,
                "culprit": job["culprit"],
                "logger": job["logger_name"],
                "level": LOG_LEVELS_MAP.get(job["level"]),
                "last_seen": job["event"].datetime,
                "first_seen": job["event"].datetime,
                "active_at": job["event"].datetime,
                "data": group_metadata,
            }

            if job["release"]:
                kwargs["first_release"] = job["release"]

            try:
                job["group"], job["is_new"], job[
                    "is_regression"] = _save_aggregate(event=job["event"],
                                                       hashes=hashes,
                                                       release=job["release"],
                                                       **kwargs)
            except HashDiscarded:
                event_discarded.send_robust(project=project,
                                            sender=EventManager)

                metrics.incr(
                    "events.discarded",
                    skip_internal=True,
                    tags={
                        "organization_id": project.organization_id,
                        "platform": job["platform"]
                    },
                )
                raise
            job["event"].group = job["group"]
        else:
            job["group"] = None
            job["is_new"] = False
            job["is_regression"] = False

        _send_event_saved_signal_many(jobs, projects)

        # store a reference to the group id to guarantee validation of isolation
        # XXX(markus): No clue what this does
        job["event"].data.bind_ref(job["event"])

        _get_or_create_environment_many(jobs, projects)

        if job["group"]:
            group_environment, job[
                "is_new_group_environment"] = GroupEnvironment.get_or_create(
                    group_id=job["group"].id,
                    environment_id=job["environment"].id,
                    defaults={"first_release": job["release"] or None},
                )
        else:
            job["is_new_group_environment"] = False

        _get_or_create_release_associated_models(jobs, projects)

        if job["release"] and job["group"]:
            job["grouprelease"] = GroupRelease.get_or_create(
                group=job["group"],
                release=job["release"],
                environment=job["environment"],
                datetime=job["event"].datetime,
            )

        _tsdb_record_all_metrics(jobs)

        if job["group"]:
            UserReport.objects.filter(project=project,
                                      event_id=job["event"].event_id).update(
                                          group=job["group"],
                                          environment=job["environment"])

        # Enusre the _metrics key exists. This is usually created during
        # and prefilled with ingestion sizes.
        event_metrics = job["event"].data.get("_metrics") or {}
        job["event"].data["_metrics"] = event_metrics

        # Capture the actual size that goes into node store.
        event_metrics["bytes.stored.event"] = len(
            json.dumps(dict(job["event"].data.items())))

        if not issueless_event:
            # Load attachments first, but persist them at the very last after
            # posting to eventstream to make sure all counters and eventstream are
            # incremented for sure.
            attachments = get_attachments(cache_key, job["event"])
            for attachment in attachments:
                key = "bytes.stored.%s" % (attachment.type, )
                event_metrics[key] = (event_metrics.get(key) or 0) + len(
                    attachment.data)

        _nodestore_save_many(jobs)

        if job["release"] and not issueless_event:
            if job["is_new"]:
                buffer.incr(
                    ReleaseProject,
                    {"new_groups": 1},
                    {
                        "release_id": job["release"].id,
                        "project_id": project.id
                    },
                )
            if job["is_new_group_environment"]:
                buffer.incr(
                    ReleaseProjectEnvironment,
                    {"new_issues_count": 1},
                    {
                        "project_id": project.id,
                        "release_id": job["release"].id,
                        "environment_id": job["environment"].id,
                    },
                )

        if not raw:
            if not project.first_event:
                project.update(first_event=job["event"].datetime)
                first_event_received.send_robust(project=project,
                                                 event=job["event"],
                                                 sender=Project)

        _eventstream_insert_many(jobs)

        if not issueless_event:
            # Do this last to ensure signals get emitted even if connection to the
            # file store breaks temporarily.
            save_attachments(attachments, job["event"])

        metric_tags = {"from_relay": "_relay_processed" in job["data"]}

        metrics.timing("events.latency",
                       received_timestamp - job["recorded_timestamp"],
                       tags=metric_tags)
        metrics.timing("events.size.data.post_save",
                       job["event"].size,
                       tags=metric_tags)
        metrics.incr(
            "events.post_save.normalize.errors",
            amount=len(job["data"].get("errors") or ()),
            tags=metric_tags,
        )

        self._data = job["event"].data.data
        return job["event"]
Beispiel #14
0
    def save(self, project_id, raw=False, assume_normalized=False):
        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize()
            self._normalized = True

        data = self._data

        project = Project.objects.get_from_cache(id=project_id)
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

        # Check to make sure we're not about to do a bunch of work that's
        # already been done if we've processed an event with this ID. (This
        # isn't a perfect solution -- this doesn't handle ``EventMapping`` and
        # there's a race condition between here and when the event is actually
        # saved, but it's an improvement. See GH-7677.)
        try:
            event = Event.objects.get(
                project_id=project.id,
                event_id=data['event_id'],
            )
        except Event.DoesNotExist:
            pass
        else:
            # Make sure we cache on the project before returning
            event._project_cache = project
            logger.info(
                'duplicate.found',
                exc_info=True,
                extra={
                    'event_uuid': data['event_id'],
                    'project_id': project.id,
                    'model': Event.__name__,
                }
            )
            return event

        # Pull out the culprit
        culprit = self.get_culprit()

        # Pull the toplevel data we're interested in
        level = data.get('level')

        # TODO(mitsuhiko): this code path should be gone by July 2018.
        # This is going to be fine because no code actually still depends
        # on integers here.  When we need an integer it will be converted
        # into one later.  Old workers used to send integers here.
        if level is not None and isinstance(level, six.integer_types):
            level = LOG_LEVELS[level]

        transaction_name = data.get('transaction')
        logger_name = data.get('logger')
        release = data.get('release')
        dist = data.get('dist')
        environment = data.get('environment')
        recorded_timestamp = data.get('timestamp')

        # We need to swap out the data with the one internal to the newly
        # created event object
        event = self._get_event_instance(project_id=project_id)
        self._data = data = event.data.data

        event._project_cache = project

        date = event.datetime
        platform = event.platform
        event_id = event.event_id

        if transaction_name:
            transaction_name = force_text(transaction_name)

        # Some of the data that are toplevel attributes are duplicated
        # into tags (logger, level, environment, transaction).  These are
        # different from legacy attributes which are normalized into tags
        # ahead of time (site, server_name).
        setdefault_path(data, 'tags', value=[])
        set_tag(data, 'level', level)
        if logger_name:
            set_tag(data, 'logger', logger_name)
        if environment:
            set_tag(data, 'environment', environment)
        if transaction_name:
            set_tag(data, 'transaction', transaction_name)

        if release:
            # dont allow a conflicting 'release' tag
            pop_tag(data, 'release')
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )
            set_tag(data, 'sentry:release', release.version)

        if dist and release:
            dist = release.add_dist(dist, date)
            # dont allow a conflicting 'dist' tag
            pop_tag(data, 'dist')
            set_tag(data, 'sentry:dist', dist.name)
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            pop_tag(data, 'user')
            set_tag(data, 'sentry:user', event_user.tag_value)

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        grouping_config = load_grouping_config(
            get_grouping_config_dict_for_event_data(data, project))
        normalize_stacktraces_for_grouping(data, grouping_config)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    if get_tag(data, key) is None:
                        set_tag(data, key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                set_tag(data, k, v)
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.path, None)

        # The active grouping config was put into the event in the
        # normalize step before.  We now also make sure that the
        # fingerprint was set to `'{{ default }}' just in case someone
        # removed it from the payload.  The call to get_hashes will then
        # look at `grouping_config` to pick the right paramters.
        data['fingerprint'] = data.get('fingerprint') or ['{{ default }}']
        apply_server_fingerprinting(data, get_fingerprinting_config_for_project(project))
        hashes = event.get_hashes()
        data['hashes'] = hashes

        # we want to freeze not just the metadata and type in but also the
        # derived attributes.  The reason for this is that we push this
        # data into kafka for snuba processing and our postprocessing
        # picks up the data right from the snuba topic.  For most usage
        # however the data is dynamically overriden by Event.title and
        # Event.location (See Event.as_dict)
        materialized_metadata = self.materialize_metadata()
        event_metadata = materialized_metadata['metadata']
        data.update(materialized_metadata)
        data['culprit'] = culprit

        # index components into ``Event.message``
        # See GH-3248
        event.message = self.get_search_message(event_metadata, culprit)
        received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s'))

        # The group gets the same metadata as the event when it's flushed but
        # additionally the `last_received` key is set.  This key is used by
        # _save_aggregate.
        group_metadata = dict(materialized_metadata)
        group_metadata['last_received'] = received_timestamp
        kwargs = {
            'platform': platform,
            'message': event.message,
            'culprit': culprit,
            'logger': logger_name,
            'level': LOG_LEVELS_MAP.get(level),
            'last_seen': date,
            'first_seen': date,
            'active_at': date,
            'data': group_metadata,
        }

        if release:
            kwargs['first_release'] = release

        try:
            group, is_new, is_regression, is_sample = self._save_aggregate(
                event=event, hashes=hashes, release=release, **kwargs
            )
        except HashDiscarded:
            event_discarded.send_robust(
                project=project,
                sender=EventManager,
            )

            metrics.incr(
                'events.discarded',
                skip_internal=True,
                tags={
                    'organization_id': project.organization_id,
                    'platform': platform,
                },
            )
            raise
        else:
            event_saved.send_robust(
                project=project,
                event_size=event.size,
                sender=EventManager,
            )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        # When an event was sampled, the canonical source of truth
        # is the EventMapping table since we aren't going to be writing out an actual
        # Event row. Otherwise, if the Event isn't being sampled, we can safely
        # rely on the Event table itself as the source of truth and ignore
        # EventMapping since it's redundant information.
        if is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(EventMapping)):
                    EventMapping.objects.create(project=project, group=group, event_id=event_id)
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': EventMapping.__name__,
                    }
                )
                return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
            group_id=group.id,
            environment_id=environment.id,
            defaults={
                'first_release': release if release else None,
            },
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            ReleaseProjectEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(
            group=group,
            environment=environment,
        )

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': Event.__name__,
                    }
                )
                return event

            tagstore.delay_index_event_tags(
                organization_id=project.organization_id,
                project_id=project.id,
                group_id=group.id,
                environment_id=environment.id,
                event_id=event.id,
                tags=event.tags,
                date_added=event.datetime,
            )

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )),
                    (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )),
                ),
                timestamp=event.datetime,
                environment_id=environment.id,
            )
        if release:
            if is_new:
                buffer.incr(
                    ReleaseProject, {'new_groups': 1}, {
                        'release_id': release.id,
                        'project_id': project.id,
                    }
                )
            if is_new_group_environment:
                buffer.incr(
                    ReleaseProjectEnvironment, {'new_issues_count': 1}, {
                        'project_id': project.id,
                        'release_id': release.id,
                        'environment_id': environment.id,
                    }
                )

        safe_execute(
            Group.objects.add_tags,
            group,
            environment,
            event.get_tags(),
            _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send_robust(project=project, group=group, sender=Project)

        eventstream.insert(
            group=group,
            event=event,
            is_new=is_new,
            is_sample=is_sample,
            is_regression=is_regression,
            is_new_group_environment=is_new_group_environment,
            primary_hash=hashes[0],
            # We are choosing to skip consuming the event back
            # in the eventstream if it's flagged as raw.
            # This means that we want to publish the event
            # through the event stream, but we don't care
            # about post processing and handling the commit.
            skip_consume=raw,
        )

        metrics.timing(
            'events.latency',
            received_timestamp - recorded_timestamp,
            tags={
                'project_id': project.id,
            },
        )

        metrics.timing(
            'events.size.data.post_save',
            event.size,
            tags={'project_id': project.id}
        )

        return event
Beispiel #15
0
    def test_ios_function_name_in_app_detection(self):
        data = {
            "platform": "cocoa",
            "debug_meta": {
                "images": []  # omitted
            },
            "exception": {
                "values": [
                    {
                        "stacktrace": {
                            "frames": [
                                {
                                    "function": "+[RNSentry ]",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "+[SentryClient ]",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "kscrash_foobar",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "kscm_foobar",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "+[KSCrash ]",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "+[KSCrash]",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "+[KSCrashy]",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                            ]
                        },
                        "type": "NSRangeException",
                    }
                ]
            },
            "contexts": {
                "os": {
                    "version": "9.3.2",
                    "type": "os",
                    "name": "iOS"
                }
            }
        }

        config = load_grouping_config(get_default_grouping_config_dict())
        normalize_stacktraces_for_grouping(data, grouping_config=config)

        frames = data['exception']['values'][0]['stacktrace']['frames']
        assert frames[0]['in_app'] is False
        assert frames[1]['in_app'] is False
        assert frames[2]['in_app'] is False
        assert frames[3]['in_app'] is False
        assert frames[4]['in_app'] is False
        assert frames[5]['in_app'] is True
        assert frames[6]['in_app'] is True
Beispiel #16
0
    def save(self, project_id, raw=False, assume_normalized=False):
        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize()
            self._normalized = True

        data = self._data

        project = Project.objects.get_from_cache(id=project_id)
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

        # Check to make sure we're not about to do a bunch of work that's
        # already been done if we've processed an event with this ID. (This
        # isn't a perfect solution -- this doesn't handle ``EventMapping`` and
        # there's a race condition between here and when the event is actually
        # saved, but it's an improvement. See GH-7677.)
        try:
            event = Event.objects.get(
                project_id=project.id,
                event_id=data['event_id'],
            )
        except Event.DoesNotExist:
            pass
        else:
            # Make sure we cache on the project before returning
            event._project_cache = project
            logger.info(
                'duplicate.found',
                exc_info=True,
                extra={
                    'event_uuid': data['event_id'],
                    'project_id': project.id,
                    'model': Event.__name__,
                }
            )
            return event

        # Pull out the culprit
        culprit = self.get_culprit()

        # Pull the toplevel data we're interested in
        level = data.get('level')

        # TODO(mitsuhiko): this code path should be gone by July 2018.
        # This is going to be fine because no code actually still depends
        # on integers here.  When we need an integer it will be converted
        # into one later.  Old workers used to send integers here.
        if level is not None and isinstance(level, six.integer_types):
            level = LOG_LEVELS[level]

        transaction_name = data.get('transaction')
        logger_name = data.get('logger')
        release = data.get('release')
        dist = data.get('dist')
        environment = data.get('environment')
        recorded_timestamp = data.get('timestamp')

        # We need to swap out the data with the one internal to the newly
        # created event object
        event = self._get_event_instance(project_id=project_id)
        self._data = data = event.data.data

        event._project_cache = project

        date = event.datetime
        platform = event.platform
        event_id = event.event_id

        if transaction_name:
            transaction_name = force_text(transaction_name)

        # Some of the data that are toplevel attributes are duplicated
        # into tags (logger, level, environment, transaction).  These are
        # different from legacy attributes which are normalized into tags
        # ahead of time (site, server_name).
        setdefault_path(data, 'tags', value=[])
        set_tag(data, 'level', level)
        if logger_name:
            set_tag(data, 'logger', logger_name)
        if environment:
            set_tag(data, 'environment', environment)
        if transaction_name:
            set_tag(data, 'transaction', transaction_name)

        if release:
            # dont allow a conflicting 'release' tag
            pop_tag(data, 'release')
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )
            set_tag(data, 'sentry:release', release.version)

        if dist and release:
            dist = release.add_dist(dist, date)
            # dont allow a conflicting 'dist' tag
            pop_tag(data, 'dist')
            set_tag(data, 'sentry:dist', dist.name)
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            pop_tag(data, 'user')
            set_tag(data, 'sentry:user', event_user.tag_value)

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        grouping_config = load_grouping_config(
            get_grouping_config_dict_for_event_data(data, project))
        normalize_stacktraces_for_grouping(data, grouping_config)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    if get_tag(data, key) is None:
                        set_tag(data, key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                set_tag(data, k, v)
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.path, None)

        # The active grouping config was put into the event in the
        # normalize step before.  We now also make sure that the
        # fingerprint was set to `'{{ default }}' just in case someone
        # removed it from the payload.  The call to get_hashes will then
        # look at `grouping_config` to pick the right paramters.
        data['fingerprint'] = data.get('fingerprint') or ['{{ default }}']
        apply_server_fingerprinting(data, get_fingerprinting_config_for_project(project))

        # Here we try to use the grouping config that was requested in the
        # event.  If that config has since been deleted (because it was an
        # experimental grouping config) we fall back to the default.
        try:
            hashes = event.get_hashes()
        except GroupingConfigNotFound:
            data['grouping_config'] = get_grouping_config_dict_for_project(project)
            hashes = event.get_hashes()

        data['hashes'] = hashes

        # we want to freeze not just the metadata and type in but also the
        # derived attributes.  The reason for this is that we push this
        # data into kafka for snuba processing and our postprocessing
        # picks up the data right from the snuba topic.  For most usage
        # however the data is dynamically overriden by Event.title and
        # Event.location (See Event.as_dict)
        materialized_metadata = self.materialize_metadata()
        event_metadata = materialized_metadata['metadata']
        data.update(materialized_metadata)
        data['culprit'] = culprit

        # index components into ``Event.message``
        # See GH-3248
        event.message = self.get_search_message(event_metadata, culprit)
        received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s'))

        # The group gets the same metadata as the event when it's flushed but
        # additionally the `last_received` key is set.  This key is used by
        # _save_aggregate.
        group_metadata = dict(materialized_metadata)
        group_metadata['last_received'] = received_timestamp
        kwargs = {
            'platform': platform,
            'message': event.message,
            'culprit': culprit,
            'logger': logger_name,
            'level': LOG_LEVELS_MAP.get(level),
            'last_seen': date,
            'first_seen': date,
            'active_at': date,
            'data': group_metadata,
        }

        if release:
            kwargs['first_release'] = release

        try:
            group, is_new, is_regression, is_sample = self._save_aggregate(
                event=event, hashes=hashes, release=release, **kwargs
            )
        except HashDiscarded:
            event_discarded.send_robust(
                project=project,
                sender=EventManager,
            )

            metrics.incr(
                'events.discarded',
                skip_internal=True,
                tags={
                    'organization_id': project.organization_id,
                    'platform': platform,
                },
            )
            raise
        else:
            event_saved.send_robust(
                project=project,
                event_size=event.size,
                sender=EventManager,
            )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        # When an event was sampled, the canonical source of truth
        # is the EventMapping table since we aren't going to be writing out an actual
        # Event row. Otherwise, if the Event isn't being sampled, we can safely
        # rely on the Event table itself as the source of truth and ignore
        # EventMapping since it's redundant information.
        if is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(EventMapping)):
                    EventMapping.objects.create(project=project, group=group, event_id=event_id)
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': EventMapping.__name__,
                    }
                )
                return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
            group_id=group.id,
            environment_id=environment.id,
            defaults={
                'first_release': release if release else None,
            },
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            ReleaseProjectEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(
            group=group,
            environment=environment,
        )

        # Update any event attachment that arrived before the event group was defined.
        EventAttachment.objects.filter(
            project_id=project.id,
            event_id=event_id,
        ).update(
            group_id=group.id,
        )

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': Event.__name__,
                    }
                )
                return event

            tagstore.delay_index_event_tags(
                organization_id=project.organization_id,
                project_id=project.id,
                group_id=group.id,
                environment_id=environment.id,
                event_id=event.id,
                tags=event.tags,
                date_added=event.datetime,
            )

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )),
                    (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )),
                ),
                timestamp=event.datetime,
                environment_id=environment.id,
            )
        if release:
            if is_new:
                buffer.incr(
                    ReleaseProject, {'new_groups': 1}, {
                        'release_id': release.id,
                        'project_id': project.id,
                    }
                )
            if is_new_group_environment:
                buffer.incr(
                    ReleaseProjectEnvironment, {'new_issues_count': 1}, {
                        'project_id': project.id,
                        'release_id': release.id,
                        'environment_id': environment.id,
                    }
                )

        safe_execute(
            Group.objects.add_tags,
            group,
            environment,
            event.get_tags(),
            _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send_robust(project=project, group=group, sender=Project)

        eventstream.insert(
            group=group,
            event=event,
            is_new=is_new,
            is_sample=is_sample,
            is_regression=is_regression,
            is_new_group_environment=is_new_group_environment,
            primary_hash=hashes[0],
            # We are choosing to skip consuming the event back
            # in the eventstream if it's flagged as raw.
            # This means that we want to publish the event
            # through the event stream, but we don't care
            # about post processing and handling the commit.
            skip_consume=raw,
        )

        metrics.timing(
            'events.latency',
            received_timestamp - recorded_timestamp,
            tags={
                'project_id': project.id,
            },
        )

        metrics.timing(
            'events.size.data.post_save',
            event.size,
            tags={'project_id': project.id}
        )

        return event
Beispiel #17
0
    def save(self, project_id, raw=False, assume_normalized=False, start_time=None, cache_key=None):
        """
        After normalizing and processing an event, save adjacent models such as
        releases and environments to postgres and write the event into
        eventstream. From there it will be picked up by Snuba and
        post-processing.

        We re-insert events with duplicate IDs into Snuba, which is responsible
        for deduplicating events. Since deduplication in Snuba is on the primary
        key (based on event ID, project ID and day), events with same IDs are only
        deduplicated if their timestamps fall on the same day. The latest event
        always wins and overwrites the value of events received earlier in that day.

        Since we increment counters and frequencies here before events get inserted
        to eventstream these numbers may be larger than the total number of
        events if we receive duplicate event IDs that fall on the same day
        (that do not hit cache first).
        """

        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize(project_id=project_id)
            self._normalized = True

        with metrics.timer("event_manager.save.project.get_from_cache"):
            project = Project.objects.get_from_cache(id=project_id)

        projects = {project.id: project}

        if self._data.get("type") == "transaction":
            self._data["project"] = int(project_id)
            job = {"data": self._data, "start_time": start_time}
            jobs = save_transaction_events([job], projects)
            return jobs[0]["event"]

        with metrics.timer("event_manager.save.organization.get_from_cache"):
            project._organization_cache = Organization.objects.get_from_cache(
                id=project.organization_id
            )

        job = {"data": self._data, "project_id": project_id, "raw": raw, "start_time": start_time}
        jobs = [job]

        _pull_out_data(jobs, projects)
        _get_or_create_release_many(jobs, projects)
        _get_event_user_many(jobs, projects)

        job["project_key"] = None
        if job["key_id"] is not None:
            with metrics.timer("event_manager.load_project_key"):
                try:
                    job["project_key"] = ProjectKey.objects.get_from_cache(id=job["key_id"])
                except ProjectKey.DoesNotExist:
                    pass

        with metrics.timer("event_manager.load_grouping_config"):
            # At this point we want to normalize the in_app values in case the
            # clients did not set this appropriately so far.
            grouping_config = load_grouping_config(
                get_grouping_config_dict_for_event_data(job["data"], project)
            )

        with metrics.timer("event_manager.normalize_stacktraces_for_grouping"):
            normalize_stacktraces_for_grouping(job["data"], grouping_config)

        _derive_plugin_tags_many(jobs, projects)
        _derive_interface_tags_many(jobs)

        with metrics.timer("event_manager.apply_server_fingerprinting"):
            # The active grouping config was put into the event in the
            # normalize step before.  We now also make sure that the
            # fingerprint was set to `'{{ default }}' just in case someone
            # removed it from the payload.  The call to get_hashes will then
            # look at `grouping_config` to pick the right parameters.
            job["data"]["fingerprint"] = job["data"].get("fingerprint") or ["{{ default }}"]
            apply_server_fingerprinting(job["data"], get_fingerprinting_config_for_project(project))

        with metrics.timer("event_manager.event.get_hashes"):
            # Here we try to use the grouping config that was requested in the
            # event.  If that config has since been deleted (because it was an
            # experimental grouping config) we fall back to the default.
            try:
                hashes = job["event"].get_hashes()
            except GroupingConfigNotFound:
                job["data"]["grouping_config"] = get_grouping_config_dict_for_project(project)
                hashes = job["event"].get_hashes()

        job["data"]["hashes"] = hashes

        _materialize_metadata_many(jobs)

        # The group gets the same metadata as the event when it's flushed but
        # additionally the `last_received` key is set.  This key is used by
        # _save_aggregate.
        group_metadata = dict(job["materialized_metadata"])
        group_metadata["last_received"] = job["received_timestamp"]
        kwargs = {
            "platform": job["platform"],
            "message": job["event"].search_message,
            "culprit": job["culprit"],
            "logger": job["logger_name"],
            "level": LOG_LEVELS_MAP.get(job["level"]),
            "last_seen": job["event"].datetime,
            "first_seen": job["event"].datetime,
            "active_at": job["event"].datetime,
            "data": group_metadata,
        }

        if job["release"]:
            kwargs["first_release"] = job["release"]

        # Load attachments first, but persist them at the very last after
        # posting to eventstream to make sure all counters and eventstream are
        # incremented for sure. Also wait for grouping to remove attachments
        # based on the group counter.
        with metrics.timer("event_manager.get_attachments"):
            attachments = get_attachments(cache_key, job)

        try:
            job["group"], job["is_new"], job["is_regression"] = _save_aggregate(
                event=job["event"], hashes=hashes, release=job["release"], **kwargs
            )
        except HashDiscarded:
            discard_event(job, attachments)
            raise

        job["event"].group = job["group"]

        # store a reference to the group id to guarantee validation of isolation
        # XXX(markus): No clue what this does
        job["event"].data.bind_ref(job["event"])

        _get_or_create_environment_many(jobs, projects)

        if job["group"]:
            group_environment, job["is_new_group_environment"] = GroupEnvironment.get_or_create(
                group_id=job["group"].id,
                environment_id=job["environment"].id,
                defaults={"first_release": job["release"] or None},
            )
        else:
            job["is_new_group_environment"] = False

        _get_or_create_release_associated_models(jobs, projects)

        if job["release"] and job["group"]:
            job["grouprelease"] = GroupRelease.get_or_create(
                group=job["group"],
                release=job["release"],
                environment=job["environment"],
                datetime=job["event"].datetime,
            )

        _tsdb_record_all_metrics(jobs)

        if job["group"]:
            UserReport.objects.filter(project=project, event_id=job["event"].event_id).update(
                group=job["group"], environment=job["environment"]
            )

        with metrics.timer("event_manager.filter_attachments_for_group"):
            attachments = filter_attachments_for_group(attachments, job)

        # XXX: DO NOT MUTATE THE EVENT PAYLOAD AFTER THIS POINT
        _materialize_event_metrics(jobs)

        for attachment in attachments:
            key = "bytes.stored.%s" % (attachment.type,)
            old_bytes = job["event_metrics"].get(key) or 0
            job["event_metrics"][key] = old_bytes + attachment.size

        _nodestore_save_many(jobs)
        save_unprocessed_event(project, event_id=job["event"].event_id)

        if job["release"]:
            if job["is_new"]:
                buffer.incr(
                    ReleaseProject,
                    {"new_groups": 1},
                    {"release_id": job["release"].id, "project_id": project.id},
                )
            if job["is_new_group_environment"]:
                buffer.incr(
                    ReleaseProjectEnvironment,
                    {"new_issues_count": 1},
                    {
                        "project_id": project.id,
                        "release_id": job["release"].id,
                        "environment_id": job["environment"].id,
                    },
                )

        if not raw:
            if not project.first_event:
                project.update(first_event=job["event"].datetime)
                first_event_received.send_robust(
                    project=project, event=job["event"], sender=Project
                )

        _eventstream_insert_many(jobs)

        # Do this last to ensure signals get emitted even if connection to the
        # file store breaks temporarily.
        with metrics.timer("event_manager.save_attachments"):
            save_attachments(cache_key, attachments, job)

        metric_tags = {"from_relay": "_relay_processed" in job["data"]}

        metrics.timing(
            "events.latency",
            job["received_timestamp"] - job["recorded_timestamp"],
            tags=metric_tags,
        )
        metrics.timing("events.size.data.post_save", job["event"].size, tags=metric_tags)
        metrics.incr(
            "events.post_save.normalize.errors",
            amount=len(job["data"].get("errors") or ()),
            tags=metric_tags,
        )

        _track_outcome_accepted_many(jobs)

        self._data = job["event"].data.data
        return job["event"]
Beispiel #18
0
    def test_ios_function_name_in_app_detection(self):
        data = {
            "platform": "cocoa",
            "debug_meta": {"images": []},  # omitted
            "exception": {
                "values": [
                    {
                        "stacktrace": {
                            "frames": [
                                {
                                    "function": "+[RNSentry ]",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "+[SentryClient ]",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "kscrash_foobar",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "kscm_foobar",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "+[KSCrash ]",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "+[KSCrash]",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                                {
                                    "function": "+[KSCrashy]",
                                    "package": "/var/containers/Bundle/Application/B33C37A8-F933-4B6B-9FFA-152282BFDF13/SentryTest.app/SentryTest",
                                    "instruction_addr": 4295098388,
                                },
                            ]
                        },
                        "type": "NSRangeException",
                    }
                ]
            },
            "contexts": {"os": {"version": "9.3.2", "type": "os", "name": "iOS"}},
        }

        config = load_grouping_config(get_default_grouping_config_dict())
        normalize_stacktraces_for_grouping(data, grouping_config=config)

        frames = data["exception"]["values"][0]["stacktrace"]["frames"]
        assert frames[0]["in_app"] is False
        assert frames[1]["in_app"] is False
        assert frames[2]["in_app"] is False
        assert frames[3]["in_app"] is False
        assert frames[4]["in_app"] is False
        assert frames[5]["in_app"] is True
        assert frames[6]["in_app"] is True
def _project_has_hierarchical_grouping(project):
    config_dict = get_grouping_config_dict_for_project(project)
    config = load_grouping_config(config_dict)
    return config.initial_context["hierarchical_grouping"]
Beispiel #20
0
    x for x in os.environ.get("SENTRY_TEST_GROUPING_DELETE_KEYWORDS",
                              "").lower().split(",") if x
]


class CategorizationInput:
    def __init__(self, filename):
        self.filename = filename

    @cached_property
    def data(self):
        with open(os.path.join(_fixture_path, self.filename)) as f:
            return _pre_scrub_event(json.load(f))


CONFIG = load_grouping_config(
    get_default_grouping_config_dict("mobile:2021-02-12"))


def get_stacktrace_render(data):
    """
    Platform agnostic stacktrace renderer with annotations
    """
    rv = []
    for exc in get_path(data, "exception", "values", filter=True) or ():
        ty = get_path(exc, "type") or "_"
        value = get_path(exc, "value") or "_"
        thread_id = get_path(exc, "id") or "_"
        crashed = get_path(exc, "crashed", default="_")
        rv.append("")
        rv.append("")
        rv.append(f"{ty}:{value} (thread_id:{thread_id}, crashed:{crashed})")