Ejemplo n.º 1
0
 def __init__(self,
              data,
              version='5',
              project=None,
              grouping_config=None,
              client_ip=None,
              user_agent=None,
              auth=None,
              key=None,
              content_encoding=None,
              is_renormalize=False,
              remove_other=None):
     self._data = _decode_event(data, content_encoding=content_encoding)
     self.version = version
     self._project = project
     if grouping_config is None and project is not None:
         grouping_config = get_grouping_config_dict_for_project(
             self._project)
     self._grouping_config = grouping_config
     self._client_ip = client_ip
     self._user_agent = user_agent
     self._auth = auth
     self._key = key
     self._is_renormalize = is_renormalize
     self._remove_other = remove_other
     self._normalized = False
Ejemplo n.º 2
0
 def __init__(
     self,
     data,
     version='5',
     project=None,
     grouping_config=None,
     client_ip=None,
     user_agent=None,
     auth=None,
     key=None,
     content_encoding=None,
     is_renormalize=False,
     remove_other=None,
     relay_config=None
 ):
     self._data = _decode_event(data, content_encoding=content_encoding)
     self.version = version
     self._project = project
     # if not explicitly specified try to get the grouping from relay_config
     if grouping_config is None and relay_config is not None:
         config = relay_config.config
         grouping_config = config.get('grouping_config')
     # if we still don't have a grouping also try the project
     if grouping_config is None and project is not None:
         grouping_config = get_grouping_config_dict_for_project(self._project)
     self._grouping_config = grouping_config
     self._client_ip = client_ip
     self._user_agent = user_agent
     self._auth = auth
     self._key = key
     self._is_renormalize = is_renormalize
     self._remove_other = remove_other
     self._normalized = False
     self.relay_config = relay_config
Ejemplo n.º 3
0
 def __init__(
     self,
     data,
     version='5',
     project=None,
     grouping_config=None,
     client_ip=None,
     user_agent=None,
     auth=None,
     key=None,
     content_encoding=None,
     is_renormalize=False,
     remove_other=None
 ):
     self._data = _decode_event(data, content_encoding=content_encoding)
     self.version = version
     self._project = project
     if grouping_config is None and project is not None:
         grouping_config = get_grouping_config_dict_for_project(self._project)
     self._grouping_config = grouping_config
     self._client_ip = client_ip
     self._user_agent = user_agent
     self._auth = auth
     self._key = key
     self._is_renormalize = is_renormalize
     self._remove_other = remove_other
     self._normalized = False
Ejemplo n.º 4
0
 def __init__(
     self,
     data,
     version="5",
     project=None,
     grouping_config=None,
     client_ip=None,
     user_agent=None,
     auth=None,
     key=None,
     content_encoding=None,
     is_renormalize=False,
     remove_other=None,
     project_config=None,
     sent_at=None,
 ):
     self._data = CanonicalKeyDict(data)
     self.version = version
     self._project = project
     # if not explicitly specified try to get the grouping from project_config
     if grouping_config is None and project_config is not None:
         config = project_config.config
         grouping_config = config.get("grouping_config")
     # if we still don't have a grouping also try the project
     if grouping_config is None and project is not None:
         grouping_config = get_grouping_config_dict_for_project(self._project)
     self._grouping_config = grouping_config
     self._client_ip = client_ip
     self._user_agent = user_agent
     self._auth = auth
     self._key = key
     self._is_renormalize = is_renormalize
     self._remove_other = remove_other
     self._normalized = False
     self.project_config = project_config
     self.sent_at = sent_at
Ejemplo n.º 5
0
def get_project_config(project, full_config=True, project_keys=None):
    """
    Constructs the ProjectConfig information.

    :param project: The project to load configuration for. Ensure that
        organization is bound on this object; otherwise it will be loaded from
        the database.
    :param full_config: True if only the full config is required, False
        if only the restricted (for external relays) is required
        (default True, i.e. full configuration)
    :param project_keys: Pre-fetched project keys for performance. However, if
        no project keys are provided it is assumed that the config does not
        need to contain auth information (this is the case when used in
        python's StoreView)

    :return: a ProjectConfig object for the given project
    """
    with configure_scope() as scope:
        scope.set_tag("project", project.id)

    if project.status != ObjectStatus.VISIBLE:
        return ProjectConfig(project, disabled=True)

    public_keys = get_public_key_configs(project,
                                         full_config,
                                         project_keys=project_keys)

    with Hub.current.start_span(op="get_public_config"):
        now = datetime.utcnow().replace(tzinfo=utc)
        cfg = {
            "disabled": False,
            "slug": project.slug,
            "lastFetch": now,
            "lastChange": project.get_option("sentry:relay-rev-lastchange",
                                             now),
            "rev": project.get_option("sentry:relay-rev",
                                      uuid.uuid4().hex),
            "publicKeys": public_keys,
            "config": {
                "allowedDomains":
                list(get_origins(project)),
                "trustedRelays": [
                    r["public_key"] for r in project.organization.get_option(
                        "sentry:trusted-relays", []) if r
                ],
                "piiConfig":
                get_pii_config(project),
                "datascrubbingSettings":
                get_datascrubbing_settings(project),
            },
            "organizationId": project.organization_id,
            "projectId":
            project.id,  # XXX: Unused by Relay, required by Python store
        }

    if not full_config:
        # This is all we need for external Relay processors
        return ProjectConfig(project, **cfg)

    with Hub.current.start_span(op="get_filter_settings"):
        cfg["config"]["filterSettings"] = get_filter_settings(project)
    with Hub.current.start_span(op="get_grouping_config_dict_for_project"):
        cfg["config"]["groupingConfig"] = get_grouping_config_dict_for_project(
            project)
    with Hub.current.start_span(op="get_event_retention"):
        cfg["config"]["eventRetention"] = quotas.get_event_retention(
            project.organization)
    with Hub.current.start_span(op="get_all_quotas"):
        cfg["config"]["quotas"] = get_quotas(project, keys=project_keys)

    return ProjectConfig(project, **cfg)
Ejemplo n.º 6
0
    def save(self,
             project_id,
             raw=False,
             assume_normalized=False,
             cache_key=None):
        """
        After normalizing and processing an event, save adjacent models such as
        releases and environments to postgres and write the event into
        eventstream. From there it will be picked up by Snuba and
        post-processing.

        We re-insert events with duplicate IDs into Snuba, which is responsible
        for deduplicating events. Since deduplication in Snuba is on the primary
        key (based on event ID, project ID and day), events with same IDs are only
        deduplicated if their timestamps fall on the same day. The latest event
        always wins and overwrites the value of events received earlier in that day.

        Since we increment counters and frequencies here before events get inserted
        to eventstream these numbers may be larger than the total number of
        events if we receive duplicate event IDs that fall on the same day
        (that do not hit cache first).
        """

        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize()
            self._normalized = True

        with metrics.timer("event_manager.save.project.get_from_cache"):
            project = Project.objects.get_from_cache(id=project_id)

        with metrics.timer("event_manager.save.organization.get_from_cache"):
            project._organization_cache = Organization.objects.get_from_cache(
                id=project.organization_id)

        job = {"data": self._data, "project_id": project_id, "raw": raw}
        jobs = [job]
        projects = {project.id: project}

        _pull_out_data(jobs, projects)

        # Right now the event type is the signal to skip the group. This
        # is going to change a lot.
        if job["event"].get_event_type() == "transaction":
            issueless_event = True
        else:
            issueless_event = False

        _get_or_create_release_many(jobs, projects)

        # XXX: remove
        if job["dist"] and job["release"]:
            job["dist"] = job["release"].add_dist(job["dist"],
                                                  job["event"].datetime)
            # dont allow a conflicting 'dist' tag
            pop_tag(job["data"], "dist")
            set_tag(job["data"], "sentry:dist", job["dist"].name)
        else:
            job["dist"] = None

        _get_event_user_many(jobs, projects)

        with metrics.timer("event_manager.load_grouping_config"):
            # At this point we want to normalize the in_app values in case the
            # clients did not set this appropriately so far.
            grouping_config = load_grouping_config(
                get_grouping_config_dict_for_event_data(job["data"], project))

        with metrics.timer("event_manager.normalize_stacktraces_for_grouping"):
            normalize_stacktraces_for_grouping(job["data"], grouping_config)

        _derive_plugin_tags_many(jobs, projects)
        _derive_interface_tags_many(jobs)

        with metrics.timer("event_manager.apply_server_fingerprinting"):
            # The active grouping config was put into the event in the
            # normalize step before.  We now also make sure that the
            # fingerprint was set to `'{{ default }}' just in case someone
            # removed it from the payload.  The call to get_hashes will then
            # look at `grouping_config` to pick the right parameters.
            job["data"]["fingerprint"] = job["data"].get("fingerprint") or [
                "{{ default }}"
            ]
            apply_server_fingerprinting(
                job["data"], get_fingerprinting_config_for_project(project))

        with metrics.timer("event_manager.event.get_hashes"):
            # Here we try to use the grouping config that was requested in the
            # event.  If that config has since been deleted (because it was an
            # experimental grouping config) we fall back to the default.
            try:
                hashes = job["event"].get_hashes()
            except GroupingConfigNotFound:
                job["data"][
                    "grouping_config"] = get_grouping_config_dict_for_project(
                        project)
                hashes = job["event"].get_hashes()

        job["data"]["hashes"] = hashes

        _materialize_metadata_many(jobs)

        job["received_timestamp"] = received_timestamp = job["event"].data.get(
            "received") or float(job["event"].datetime.strftime("%s"))

        if not issueless_event:
            # The group gets the same metadata as the event when it's flushed but
            # additionally the `last_received` key is set.  This key is used by
            # _save_aggregate.
            group_metadata = dict(job["materialized_metadata"])
            group_metadata["last_received"] = received_timestamp
            kwargs = {
                "platform": job["platform"],
                "message": job["event"].search_message,
                "culprit": job["culprit"],
                "logger": job["logger_name"],
                "level": LOG_LEVELS_MAP.get(job["level"]),
                "last_seen": job["event"].datetime,
                "first_seen": job["event"].datetime,
                "active_at": job["event"].datetime,
                "data": group_metadata,
            }

            if job["release"]:
                kwargs["first_release"] = job["release"]

            try:
                job["group"], job["is_new"], job[
                    "is_regression"] = _save_aggregate(event=job["event"],
                                                       hashes=hashes,
                                                       release=job["release"],
                                                       **kwargs)
            except HashDiscarded:
                event_discarded.send_robust(project=project,
                                            sender=EventManager)

                metrics.incr(
                    "events.discarded",
                    skip_internal=True,
                    tags={
                        "organization_id": project.organization_id,
                        "platform": job["platform"]
                    },
                )
                raise
            job["event"].group = job["group"]
        else:
            job["group"] = None
            job["is_new"] = False
            job["is_regression"] = False

        _send_event_saved_signal_many(jobs, projects)

        # store a reference to the group id to guarantee validation of isolation
        # XXX(markus): No clue what this does
        job["event"].data.bind_ref(job["event"])

        _get_or_create_environment_many(jobs, projects)

        if job["group"]:
            group_environment, job[
                "is_new_group_environment"] = GroupEnvironment.get_or_create(
                    group_id=job["group"].id,
                    environment_id=job["environment"].id,
                    defaults={"first_release": job["release"] or None},
                )
        else:
            job["is_new_group_environment"] = False

        _get_or_create_release_associated_models(jobs, projects)

        if job["release"] and job["group"]:
            job["grouprelease"] = GroupRelease.get_or_create(
                group=job["group"],
                release=job["release"],
                environment=job["environment"],
                datetime=job["event"].datetime,
            )

        _tsdb_record_all_metrics(jobs)

        if job["group"]:
            UserReport.objects.filter(project=project,
                                      event_id=job["event"].event_id).update(
                                          group=job["group"],
                                          environment=job["environment"])

        # Enusre the _metrics key exists. This is usually created during
        # and prefilled with ingestion sizes.
        event_metrics = job["event"].data.get("_metrics") or {}
        job["event"].data["_metrics"] = event_metrics

        # Capture the actual size that goes into node store.
        event_metrics["bytes.stored.event"] = len(
            json.dumps(dict(job["event"].data.items())))

        if not issueless_event:
            # Load attachments first, but persist them at the very last after
            # posting to eventstream to make sure all counters and eventstream are
            # incremented for sure.
            attachments = get_attachments(cache_key, job["event"])
            for attachment in attachments:
                key = "bytes.stored.%s" % (attachment.type, )
                event_metrics[key] = (event_metrics.get(key) or 0) + len(
                    attachment.data)

        _nodestore_save_many(jobs)

        if job["release"] and not issueless_event:
            if job["is_new"]:
                buffer.incr(
                    ReleaseProject,
                    {"new_groups": 1},
                    {
                        "release_id": job["release"].id,
                        "project_id": project.id
                    },
                )
            if job["is_new_group_environment"]:
                buffer.incr(
                    ReleaseProjectEnvironment,
                    {"new_issues_count": 1},
                    {
                        "project_id": project.id,
                        "release_id": job["release"].id,
                        "environment_id": job["environment"].id,
                    },
                )

        if not raw:
            if not project.first_event:
                project.update(first_event=job["event"].datetime)
                first_event_received.send_robust(project=project,
                                                 event=job["event"],
                                                 sender=Project)

        _eventstream_insert_many(jobs)

        if not issueless_event:
            # Do this last to ensure signals get emitted even if connection to the
            # file store breaks temporarily.
            save_attachments(attachments, job["event"])

        metric_tags = {"from_relay": "_relay_processed" in job["data"]}

        metrics.timing("events.latency",
                       received_timestamp - job["recorded_timestamp"],
                       tags=metric_tags)
        metrics.timing("events.size.data.post_save",
                       job["event"].size,
                       tags=metric_tags)
        metrics.incr(
            "events.post_save.normalize.errors",
            amount=len(job["data"].get("errors") or ()),
            tags=metric_tags,
        )

        self._data = job["event"].data.data
        return job["event"]
Ejemplo n.º 7
0
 def get_grouping_config(self):
     """Returns the event grouping config."""
     from sentry.grouping.api import get_grouping_config_dict_for_project
     return self.data.get('grouping_config') \
         or get_grouping_config_dict_for_project(self.project)
Ejemplo n.º 8
0
    def save(self,
             project_id,
             raw=False,
             assume_normalized=False,
             cache_key=None):
        """
        We re-insert events with duplicate IDs into Snuba, which is responsible
        for deduplicating events. Since deduplication in Snuba is on the primary
        key (based on event ID, project ID and day), events with same IDs are only
        deduplicated if their timestamps fall on the same day. The latest event
        always wins and overwrites the value of events received earlier in that day.

        Since we increment counters and frequencies here before events get inserted
        to eventstream these numbers may be larger than the total number of
        events if we receive duplicate event IDs that fall on the same day
        (that do not hit cache first).
        """

        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize()
            self._normalized = True

        data = self._data

        project = Project.objects.get_from_cache(id=project_id)
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

        # Pull out the culprit
        culprit = self.get_culprit()

        # Pull the toplevel data we're interested in
        level = data.get("level")

        # TODO(mitsuhiko): this code path should be gone by July 2018.
        # This is going to be fine because no code actually still depends
        # on integers here.  When we need an integer it will be converted
        # into one later.  Old workers used to send integers here.
        if level is not None and isinstance(level, six.integer_types):
            level = LOG_LEVELS[level]

        transaction_name = data.get("transaction")
        logger_name = data.get("logger")
        release = data.get("release")
        dist = data.get("dist")
        environment = data.get("environment")
        recorded_timestamp = data.get("timestamp")

        # We need to swap out the data with the one internal to the newly
        # created event object
        event = self._get_event_instance(project_id=project_id)
        self._data = data = event.data.data

        event._project_cache = project

        date = event.datetime
        platform = event.platform
        event_id = event.event_id

        if transaction_name:
            transaction_name = force_text(transaction_name)

        # Right now the event type is the signal to skip the group. This
        # is going to change a lot.
        if event.get_event_type() == "transaction":
            issueless_event = True
        else:
            issueless_event = False

        # Some of the data that are toplevel attributes are duplicated
        # into tags (logger, level, environment, transaction).  These are
        # different from legacy attributes which are normalized into tags
        # ahead of time (site, server_name).
        setdefault_path(data, "tags", value=[])
        set_tag(data, "level", level)
        if logger_name:
            set_tag(data, "logger", logger_name)
        if environment:
            set_tag(data, "environment", environment)
        if transaction_name:
            set_tag(data, "transaction", transaction_name)

        if release:
            # dont allow a conflicting 'release' tag
            pop_tag(data, "release")
            release = Release.get_or_create(project=project,
                                            version=release,
                                            date_added=date)
            set_tag(data, "sentry:release", release.version)

        if dist and release:
            dist = release.add_dist(dist, date)
            # dont allow a conflicting 'dist' tag
            pop_tag(data, "dist")
            set_tag(data, "sentry:dist", dist.name)
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            pop_tag(data, "user")
            set_tag(data, "sentry:user", event_user.tag_value)

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        grouping_config = load_grouping_config(
            get_grouping_config_dict_for_event_data(data, project))
        normalize_stacktraces_for_grouping(data, grouping_config)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags,
                                      event,
                                      _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    if get_tag(data, key) is None:
                        set_tag(data, key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                set_tag(data, k, v)
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.path, None)

        # The active grouping config was put into the event in the
        # normalize step before.  We now also make sure that the
        # fingerprint was set to `'{{ default }}' just in case someone
        # removed it from the payload.  The call to get_hashes will then
        # look at `grouping_config` to pick the right parameters.
        data["fingerprint"] = data.get("fingerprint") or ["{{ default }}"]
        apply_server_fingerprinting(
            data, get_fingerprinting_config_for_project(project))

        # Here we try to use the grouping config that was requested in the
        # event.  If that config has since been deleted (because it was an
        # experimental grouping config) we fall back to the default.
        try:
            hashes = event.get_hashes()
        except GroupingConfigNotFound:
            data["grouping_config"] = get_grouping_config_dict_for_project(
                project)
            hashes = event.get_hashes()

        data["hashes"] = hashes

        # we want to freeze not just the metadata and type in but also the
        # derived attributes.  The reason for this is that we push this
        # data into kafka for snuba processing and our postprocessing
        # picks up the data right from the snuba topic.  For most usage
        # however the data is dynamically overridden by Event.title and
        # Event.location (See Event.as_dict)
        materialized_metadata = self.materialize_metadata()
        data.update(materialized_metadata)
        data["culprit"] = culprit

        received_timestamp = event.data.get("received") or float(
            event.datetime.strftime("%s"))

        if not issueless_event:
            # The group gets the same metadata as the event when it's flushed but
            # additionally the `last_received` key is set.  This key is used by
            # _save_aggregate.
            group_metadata = dict(materialized_metadata)
            group_metadata["last_received"] = received_timestamp
            kwargs = {
                "platform": platform,
                "message": event.search_message,
                "culprit": culprit,
                "logger": logger_name,
                "level": LOG_LEVELS_MAP.get(level),
                "last_seen": date,
                "first_seen": date,
                "active_at": date,
                "data": group_metadata,
            }

            if release:
                kwargs["first_release"] = release

            try:
                group, is_new, is_regression = self._save_aggregate(
                    event=event, hashes=hashes, release=release, **kwargs)
            except HashDiscarded:
                event_discarded.send_robust(project=project,
                                            sender=EventManager)

                metrics.incr(
                    "events.discarded",
                    skip_internal=True,
                    tags={
                        "organization_id": project.organization_id,
                        "platform": platform
                    },
                )
                raise
            else:
                event_saved.send_robust(project=project,
                                        event_size=event.size,
                                        sender=EventManager)
            event.group = group
        else:
            group = None
            is_new = False
            is_regression = False
            event_saved.send_robust(project=project,
                                    event_size=event.size,
                                    sender=EventManager)

        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        environment = Environment.get_or_create(project=project,
                                                name=environment)

        if group:
            group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
                group_id=group.id,
                environment_id=environment.id,
                defaults={"first_release": release if release else None},
            )
        else:
            is_new_group_environment = False

        if release:
            ReleaseEnvironment.get_or_create(project=project,
                                             release=release,
                                             environment=environment,
                                             datetime=date)

            ReleaseProjectEnvironment.get_or_create(project=project,
                                                    release=release,
                                                    environment=environment,
                                                    datetime=date)

            if group:
                grouprelease = GroupRelease.get_or_create(
                    group=group,
                    release=release,
                    environment=environment,
                    datetime=date)

        counters = [(tsdb.models.project, project.id)]

        if group:
            counters.append((tsdb.models.group, group.id))

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters,
                        timestamp=event.datetime,
                        environment_id=environment.id)

        frequencies = []

        if group:
            frequencies.append((tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1
                }
            }))

            if release:
                frequencies.append((tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1
                    }
                }))
        if frequencies:
            tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        if group:
            UserReport.objects.filter(project=project,
                                      event_id=event_id).update(
                                          group=group, environment=environment)

        # Enusre the _metrics key exists. This is usually created during
        # and prefilled with ingestion sizes.
        event_metrics = event.data.get("_metrics") or {}
        event.data["_metrics"] = event_metrics

        # Capture the actual size that goes into node store.
        event_metrics["bytes.stored.event"] = len(
            json.dumps(dict(event.data.items())))

        # Load attachments first, but persist them at the very last after
        # posting to eventstream to make sure all counters and eventstream are
        # incremented for sure.
        attachments = self.get_attachments(cache_key, event)
        for attachment in attachments:
            key = "bytes.stored.%s" % (attachment.type, )
            event_metrics[key] = (event_metrics.get(key) or 0) + len(
                attachment.data)

        # Write the event to Nodestore
        event.data.save()

        if event_user:
            counters = [(tsdb.models.users_affected_by_project, project.id,
                         (event_user.tag_value, ))]

            if group:
                counters.append((tsdb.models.users_affected_by_group, group.id,
                                 (event_user.tag_value, )))

            tsdb.record_multi(counters,
                              timestamp=event.datetime,
                              environment_id=environment.id)

        if release:
            if is_new:
                buffer.incr(
                    ReleaseProject,
                    {"new_groups": 1},
                    {
                        "release_id": release.id,
                        "project_id": project.id
                    },
                )
            if is_new_group_environment:
                buffer.incr(
                    ReleaseProjectEnvironment,
                    {"new_issues_count": 1},
                    {
                        "project_id": project.id,
                        "release_id": release.id,
                        "environment_id": environment.id,
                    },
                )

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send_robust(project=project,
                                                 event=event,
                                                 sender=Project)

        eventstream.insert(
            group=group,
            event=event,
            is_new=is_new,
            is_regression=is_regression,
            is_new_group_environment=is_new_group_environment,
            primary_hash=hashes[0],
            # We are choosing to skip consuming the event back
            # in the eventstream if it's flagged as raw.
            # This means that we want to publish the event
            # through the event stream, but we don't care
            # about post processing and handling the commit.
            skip_consume=raw,
        )

        # Do this last to ensure signals get emitted even if connection to the
        # file store breaks temporarily.
        self.save_attachments(attachments, event)

        metric_tags = {"from_relay": "_relay_processed" in self._data}

        metrics.timing("events.latency",
                       received_timestamp - recorded_timestamp,
                       tags=metric_tags)
        metrics.timing("events.size.data.post_save",
                       event.size,
                       tags=metric_tags)
        metrics.incr(
            "events.post_save.normalize.errors",
            amount=len(self._data.get("errors") or ()),
            tags=metric_tags,
        )

        return event
Ejemplo n.º 9
0
    def save(self, project_id, raw=False, assume_normalized=False):
        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize()
            self._normalized = True

        data = self._data

        project = Project.objects.get_from_cache(id=project_id)
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

        # Check to make sure we're not about to do a bunch of work that's
        # already been done if we've processed an event with this ID. (This
        # isn't a perfect solution -- this doesn't handle ``EventMapping`` and
        # there's a race condition between here and when the event is actually
        # saved, but it's an improvement. See GH-7677.)
        try:
            event = Event.objects.get(
                project_id=project.id,
                event_id=data['event_id'],
            )
        except Event.DoesNotExist:
            pass
        else:
            # Make sure we cache on the project before returning
            event._project_cache = project
            logger.info(
                'duplicate.found',
                exc_info=True,
                extra={
                    'event_uuid': data['event_id'],
                    'project_id': project.id,
                    'model': Event.__name__,
                }
            )
            return event

        # Pull out the culprit
        culprit = self.get_culprit()

        # Pull the toplevel data we're interested in
        level = data.get('level')

        # TODO(mitsuhiko): this code path should be gone by July 2018.
        # This is going to be fine because no code actually still depends
        # on integers here.  When we need an integer it will be converted
        # into one later.  Old workers used to send integers here.
        if level is not None and isinstance(level, six.integer_types):
            level = LOG_LEVELS[level]

        transaction_name = data.get('transaction')
        logger_name = data.get('logger')
        release = data.get('release')
        dist = data.get('dist')
        environment = data.get('environment')
        recorded_timestamp = data.get('timestamp')

        # We need to swap out the data with the one internal to the newly
        # created event object
        event = self._get_event_instance(project_id=project_id)
        self._data = data = event.data.data

        event._project_cache = project

        date = event.datetime
        platform = event.platform
        event_id = event.event_id

        if transaction_name:
            transaction_name = force_text(transaction_name)

        # Some of the data that are toplevel attributes are duplicated
        # into tags (logger, level, environment, transaction).  These are
        # different from legacy attributes which are normalized into tags
        # ahead of time (site, server_name).
        setdefault_path(data, 'tags', value=[])
        set_tag(data, 'level', level)
        if logger_name:
            set_tag(data, 'logger', logger_name)
        if environment:
            set_tag(data, 'environment', environment)
        if transaction_name:
            set_tag(data, 'transaction', transaction_name)

        if release:
            # dont allow a conflicting 'release' tag
            pop_tag(data, 'release')
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )
            set_tag(data, 'sentry:release', release.version)

        if dist and release:
            dist = release.add_dist(dist, date)
            # dont allow a conflicting 'dist' tag
            pop_tag(data, 'dist')
            set_tag(data, 'sentry:dist', dist.name)
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            pop_tag(data, 'user')
            set_tag(data, 'sentry:user', event_user.tag_value)

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        grouping_config = load_grouping_config(
            get_grouping_config_dict_for_event_data(data, project))
        normalize_stacktraces_for_grouping(data, grouping_config)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    if get_tag(data, key) is None:
                        set_tag(data, key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                set_tag(data, k, v)
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.path, None)

        # The active grouping config was put into the event in the
        # normalize step before.  We now also make sure that the
        # fingerprint was set to `'{{ default }}' just in case someone
        # removed it from the payload.  The call to get_hashes will then
        # look at `grouping_config` to pick the right paramters.
        data['fingerprint'] = data.get('fingerprint') or ['{{ default }}']
        apply_server_fingerprinting(data, get_fingerprinting_config_for_project(project))

        # Here we try to use the grouping config that was requested in the
        # event.  If that config has since been deleted (because it was an
        # experimental grouping config) we fall back to the default.
        try:
            hashes = event.get_hashes()
        except GroupingConfigNotFound:
            data['grouping_config'] = get_grouping_config_dict_for_project(project)
            hashes = event.get_hashes()

        data['hashes'] = hashes

        # we want to freeze not just the metadata and type in but also the
        # derived attributes.  The reason for this is that we push this
        # data into kafka for snuba processing and our postprocessing
        # picks up the data right from the snuba topic.  For most usage
        # however the data is dynamically overriden by Event.title and
        # Event.location (See Event.as_dict)
        materialized_metadata = self.materialize_metadata()
        event_metadata = materialized_metadata['metadata']
        data.update(materialized_metadata)
        data['culprit'] = culprit

        # index components into ``Event.message``
        # See GH-3248
        event.message = self.get_search_message(event_metadata, culprit)
        received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s'))

        # The group gets the same metadata as the event when it's flushed but
        # additionally the `last_received` key is set.  This key is used by
        # _save_aggregate.
        group_metadata = dict(materialized_metadata)
        group_metadata['last_received'] = received_timestamp
        kwargs = {
            'platform': platform,
            'message': event.message,
            'culprit': culprit,
            'logger': logger_name,
            'level': LOG_LEVELS_MAP.get(level),
            'last_seen': date,
            'first_seen': date,
            'active_at': date,
            'data': group_metadata,
        }

        if release:
            kwargs['first_release'] = release

        try:
            group, is_new, is_regression, is_sample = self._save_aggregate(
                event=event, hashes=hashes, release=release, **kwargs
            )
        except HashDiscarded:
            event_discarded.send_robust(
                project=project,
                sender=EventManager,
            )

            metrics.incr(
                'events.discarded',
                skip_internal=True,
                tags={
                    'organization_id': project.organization_id,
                    'platform': platform,
                },
            )
            raise
        else:
            event_saved.send_robust(
                project=project,
                event_size=event.size,
                sender=EventManager,
            )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        # When an event was sampled, the canonical source of truth
        # is the EventMapping table since we aren't going to be writing out an actual
        # Event row. Otherwise, if the Event isn't being sampled, we can safely
        # rely on the Event table itself as the source of truth and ignore
        # EventMapping since it's redundant information.
        if is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(EventMapping)):
                    EventMapping.objects.create(project=project, group=group, event_id=event_id)
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': EventMapping.__name__,
                    }
                )
                return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
            group_id=group.id,
            environment_id=environment.id,
            defaults={
                'first_release': release if release else None,
            },
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            ReleaseProjectEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(
            group=group,
            environment=environment,
        )

        # Update any event attachment that arrived before the event group was defined.
        EventAttachment.objects.filter(
            project_id=project.id,
            event_id=event_id,
        ).update(
            group_id=group.id,
        )

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': Event.__name__,
                    }
                )
                return event

            tagstore.delay_index_event_tags(
                organization_id=project.organization_id,
                project_id=project.id,
                group_id=group.id,
                environment_id=environment.id,
                event_id=event.id,
                tags=event.tags,
                date_added=event.datetime,
            )

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )),
                    (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )),
                ),
                timestamp=event.datetime,
                environment_id=environment.id,
            )
        if release:
            if is_new:
                buffer.incr(
                    ReleaseProject, {'new_groups': 1}, {
                        'release_id': release.id,
                        'project_id': project.id,
                    }
                )
            if is_new_group_environment:
                buffer.incr(
                    ReleaseProjectEnvironment, {'new_issues_count': 1}, {
                        'project_id': project.id,
                        'release_id': release.id,
                        'environment_id': environment.id,
                    }
                )

        safe_execute(
            Group.objects.add_tags,
            group,
            environment,
            event.get_tags(),
            _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send_robust(project=project, group=group, sender=Project)

        eventstream.insert(
            group=group,
            event=event,
            is_new=is_new,
            is_sample=is_sample,
            is_regression=is_regression,
            is_new_group_environment=is_new_group_environment,
            primary_hash=hashes[0],
            # We are choosing to skip consuming the event back
            # in the eventstream if it's flagged as raw.
            # This means that we want to publish the event
            # through the event stream, but we don't care
            # about post processing and handling the commit.
            skip_consume=raw,
        )

        metrics.timing(
            'events.latency',
            received_timestamp - recorded_timestamp,
            tags={
                'project_id': project.id,
            },
        )

        metrics.timing(
            'events.size.data.post_save',
            event.size,
            tags={'project_id': project.id}
        )

        return event
Ejemplo n.º 10
0
def get_project_config(project_id, full_config=True, for_store=False):
    """
    Constructs the ProjectConfig information.

    :param project_id: the project id as int or string
    :param full_config: True if only the full config is required, False
        if only the restricted (for external relays) is required
        (default True, i.e. full configuration)
    :param for_store: If set to true, this omits all parameters that are not
        needed for store normalization. This is a temporary flag that should
        be removed once store has been moved to Relay. Most importantly, this
        avoids database accesses.

    :return: a ProjectConfig object for the given project
    """
    project = _get_project_from_id(six.text_type(project_id))

    if project is None:
        raise APIError("Invalid project id:{}".format(project_id))

    with configure_scope() as scope:
        scope.set_tag("project", project.id)

    if for_store:
        project_keys = []
    else:
        project_keys = ProjectKey.objects \
            .filter(project=project) \
            .all()

    public_keys = {}
    for project_key in project_keys:
        public_keys[project_key.public_key] = project_key.status == 0

    now = datetime.utcnow().replace(tzinfo=utc)

    org_options = OrganizationOption.objects.get_all_values(
        project.organization_id)

    cfg = {
        'disabled': project.status > 0,
        'slug': project.slug,
        'lastFetch': now,
        'lastChange': project.get_option('sentry:relay-rev-lastchange', now),
        'rev': project.get_option('sentry:relay-rev', uuid.uuid4().hex),
        'publicKeys': public_keys,
        'config': {
            'allowedDomains': project.get_option('sentry:origins', ['*']),
            'trustedRelays': org_options.get('sentry:trusted-relays', []),
            'piiConfig': _get_pii_config(project, org_options),
        },
        'project_id': project.id,
    }

    if not full_config:
        # This is all we need for external Relay processors
        return ProjectConfig(project, **cfg)

    # The organization id is only required for reporting when processing events
    # internally. Do not expose it to external Relays.
    cfg['organization_id'] = project.organization_id

    # Explicitly bind Organization so we don't implicitly query it later
    # this just allows us to comfortably assure that `project.organization` is safe.
    # This also allows us to pull the object from cache, instead of being
    # implicitly fetched from database.
    project.organization = Organization.objects.get_from_cache(
        id=project.organization_id)

    if project.organization is not None:
        org_options = OrganizationOption.objects.get_all_values(
            project.organization_id)
    else:
        org_options = {}

    project_cfg = cfg['config']

    # get the filter settings for this project
    filter_settings = {}
    project_cfg['filter_settings'] = filter_settings

    for flt in get_all_filters():
        filter_id = get_filter_key(flt)
        settings = _load_filter_settings(flt, project)
        filter_settings[filter_id] = settings

    invalid_releases = project.get_option(u'sentry:{}'.format(FilterTypes.RELEASES))
    if invalid_releases:
        filter_settings[FilterTypes.RELEASES] = {'releases': invalid_releases}

    blacklisted_ips = project.get_option('sentry:blacklisted_ips')
    if blacklisted_ips:
        filter_settings['client_ips'] = {'blacklisted_ips': blacklisted_ips}

    error_messages = project.get_option(u'sentry:{}'.format(FilterTypes.ERROR_MESSAGES))
    if error_messages:
        filter_settings[FilterTypes.ERROR_MESSAGES] = {'patterns': error_messages}

    csp_disallowed_sources = []
    if bool(project.get_option('sentry:csp_ignored_sources_defaults', True)):
        csp_disallowed_sources += DEFAULT_DISALLOWED_SOURCES
    csp_disallowed_sources += project.get_option('sentry:csp_ignored_sources', [])
    if csp_disallowed_sources:
        filter_settings['csp'] = {'disallowed_sources': csp_disallowed_sources}

    scrub_ip_address = (org_options.get('sentry:require_scrub_ip_address', False) or
                        project.get_option('sentry:scrub_ip_address', False))

    project_cfg['scrub_ip_addresses'] = scrub_ip_address

    scrub_data = (org_options.get('sentry:require_scrub_data', False) or
                  project.get_option('sentry:scrub_data', True))

    project_cfg['scrub_data'] = scrub_data
    project_cfg['grouping_config'] = get_grouping_config_dict_for_project(project)
    project_cfg['allowed_domains'] = list(get_origins(project))

    if scrub_data:
        # We filter data immediately before it ever gets into the queue
        sensitive_fields_key = 'sentry:sensitive_fields'
        sensitive_fields = (
            org_options.get(sensitive_fields_key, []) +
            project.get_option(sensitive_fields_key, [])
        )
        project_cfg['sensitive_fields'] = sensitive_fields

        exclude_fields_key = 'sentry:safe_fields'
        exclude_fields = (
            org_options.get(exclude_fields_key, []) +
            project.get_option(exclude_fields_key, [])
        )
        project_cfg['exclude_fields'] = exclude_fields

        scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or
                          project.get_option('sentry:scrub_defaults', True))
        project_cfg['scrub_defaults'] = scrub_defaults

    return ProjectConfig(project, **cfg)
Ejemplo n.º 11
0
def get_project_config(project,
                       org_options=None,
                       full_config=True,
                       for_store=False):
    """
    Constructs the ProjectConfig information.

    :param project: The project to load configuration for. Ensure that
        organization is bound on this object; otherwise it will be loaded from
        the database.
    :param org_options: Inject preloaded organization options for faster loading.
        If ``None``, options are lazy-loaded from the database.
    :param full_config: True if only the full config is required, False
        if only the restricted (for external relays) is required
        (default True, i.e. full configuration)
    :param for_store: If set to true, this omits all parameters that are not
        needed for Relay. This is a temporary flag that should be removed once
        store has been moved to Relay. Most importantly, this avoids database
        accesses.

    :return: a ProjectConfig object for the given project
    """
    with configure_scope() as scope:
        scope.set_tag("project", project.id)

    if for_store:
        project_keys = []
    else:
        project_keys = ProjectKey.objects.filter(project=project).all()

    public_keys = []

    for project_key in project_keys:
        key = {
            "publicKey": project_key.public_key,
            "isEnabled": project_key.status == 0
        }
        if full_config:
            key["numericId"] = project_key.id

            key["quotas"] = [
                quota.to_json()
                for quota in quotas.get_quotas(project, key=project_key)
            ]
        public_keys.append(key)

    now = datetime.utcnow().replace(tzinfo=utc)

    if org_options is None:
        org_options = OrganizationOption.objects.get_all_values(
            project.organization_id)

    cfg = {
        "disabled": project.status > 0,
        "slug": project.slug,
        "lastFetch": now,
        "lastChange": project.get_option("sentry:relay-rev-lastchange", now),
        "rev": project.get_option("sentry:relay-rev",
                                  uuid.uuid4().hex),
        "publicKeys": public_keys,
        "config": {
            "allowedDomains":
            project.get_option("sentry:origins", ["*"]),
            "trustedRelays":
            org_options.get("sentry:trusted-relays", []),
            "piiConfig":
            _get_pii_config(project),
            "datascrubbingSettings":
            _get_datascrubbing_settings(project, org_options),
        },
        "project_id": project.id,
    }

    if not full_config:
        # This is all we need for external Relay processors
        return ProjectConfig(project, **cfg)

    # The organization id is only required for reporting when processing events
    # internally. Do not expose it to external Relays.
    cfg["organization_id"] = project.organization_id

    project_cfg = cfg["config"]

    # get the filter settings for this project
    filter_settings = {}
    project_cfg["filter_settings"] = filter_settings

    for flt in get_all_filters():
        filter_id = get_filter_key(flt)
        settings = _load_filter_settings(flt, project)
        filter_settings[filter_id] = settings

    invalid_releases = project.get_option(u"sentry:{}".format(
        FilterTypes.RELEASES))
    if invalid_releases:
        filter_settings[FilterTypes.RELEASES] = {"releases": invalid_releases}

    blacklisted_ips = project.get_option("sentry:blacklisted_ips")
    if blacklisted_ips:
        filter_settings["client_ips"] = {"blacklisted_ips": blacklisted_ips}

    error_messages = project.get_option(u"sentry:{}".format(
        FilterTypes.ERROR_MESSAGES))
    if error_messages:
        filter_settings[FilterTypes.ERROR_MESSAGES] = {
            "patterns": error_messages
        }

    csp_disallowed_sources = []
    if bool(project.get_option("sentry:csp_ignored_sources_defaults", True)):
        csp_disallowed_sources += DEFAULT_DISALLOWED_SOURCES
    csp_disallowed_sources += project.get_option("sentry:csp_ignored_sources",
                                                 [])
    if csp_disallowed_sources:
        filter_settings["csp"] = {"disallowed_sources": csp_disallowed_sources}

    scrub_ip_address = org_options.get("sentry:require_scrub_ip_address",
                                       False) or project.get_option(
                                           "sentry:scrub_ip_address", False)

    project_cfg["scrub_ip_addresses"] = scrub_ip_address

    project_cfg["grouping_config"] = get_grouping_config_dict_for_project(
        project)
    project_cfg["allowed_domains"] = list(get_origins(project))

    return ProjectConfig(project, **cfg)
Ejemplo n.º 12
0
def get_full_relay_config(project_id):
    """
    Constructs the internal (big) RelayConfig

    :param project_id: the project id as int or string
    :return: FullRelayConfig the relay configuration
    """

    cfg = {}
    project = _get_project_from_id(six.text_type(project_id))

    if project is None:
        raise APIError("Invalid project id:{}".format(project_id))

    cfg['project_id'] = project.id
    cfg['organization_id'] = project.organization_id

    # Explicitly bind Organization so we don't implicitly query it later
    # this just allows us to comfortably assure that `project.organization` is safe.
    # This also allows us to pull the object from cache, instead of being
    # implicitly fetched from database.
    project.organization = Organization.objects.get_from_cache(
        id=project.organization_id)

    if project.organization is not None:
        org_options = OrganizationOption.objects.get_all_values(
            project.organization_id)
    else:
        org_options = {}

    # get the project options
    project_cfg = {}
    cfg['config'] = project_cfg

    # getting kafka info
    try:
        project_cfg['kafka_max_event_size'] = options.get(
            'kafka-publisher.max-event-size')
        project_cfg['kafka_raw_event_sample_rate'] = options.get(
            'kafka-publisher.raw-event-sample-rate')
    except Exception:
        pass  # should we log ?

    invalid_releases = project.get_option(u'sentry:{}'.format(
        FilterTypes.RELEASES))
    if invalid_releases is not None:
        project_cfg['invalid_releases'] = invalid_releases

    # get the filters enabled for the current project
    enabled_filters = [
        filter_class.id for filter_class in filters.all()
        if filter_class(project).is_enabled()
    ]

    project_cfg['enabled_filters'] = enabled_filters

    scrub_ip_address = (
        org_options.get('sentry:require_scrub_ip_address', False)
        or project.get_option('sentry:scrub_ip_address', False))

    project_cfg['scrub_ip_addresses'] = scrub_ip_address

    scrub_data = (org_options.get('sentry:require_scrub_data', False)
                  or project.get_option('sentry:scrub_data', True))

    project_cfg['scrub_data'] = scrub_data
    project_cfg['grouping_config'] = get_grouping_config_dict_for_project(
        project)
    project_cfg['allowed_domains'] = list(get_origins(project))

    if scrub_data:
        # We filter data immediately before it ever gets into the queue
        sensitive_fields_key = 'sentry:sensitive_fields'
        sensitive_fields = (org_options.get(sensitive_fields_key, []) +
                            project.get_option(sensitive_fields_key, []))
        project_cfg['sensitive_fields'] = sensitive_fields

        exclude_fields_key = 'sentry:safe_fields'
        exclude_fields = (org_options.get(exclude_fields_key, []) +
                          project.get_option(exclude_fields_key, []))
        project_cfg['exclude_fields'] = exclude_fields

        scrub_defaults = (org_options.get('sentry:require_scrub_defaults',
                                          False)
                          or project.get_option('sentry:scrub_defaults', True))
        project_cfg['scrub_defaults'] = scrub_defaults

    return FullRelayConfig(project, **cfg)
Ejemplo n.º 13
0
def get_project_config(project_id, full_config=True, for_store=False):
    """
    Constructs the ProjectConfig information.

    :param project_id: the project id as int or string
    :param full_config: True if only the full config is required, False
        if only the restricted (for external relays) is required
        (default True, i.e. full configuration)
    :param for_store: If set to true, this omits all parameters that are not
        needed for store normalization. This is a temporary flag that should
        be removed once store has been moved to Relay. Most importantly, this
        avoids database accesses.

    :return: a ProjectConfig object for the given project
    """
    project = _get_project_from_id(six.text_type(project_id))

    if project is None:
        raise APIError("Invalid project id:{}".format(project_id))

    with configure_scope() as scope:
        scope.set_tag("project", project.id)

    if for_store:
        project_keys = []
    else:
        project_keys = ProjectKey.objects.filter(project=project).all()

    public_keys = {}
    for project_key in project_keys:
        public_keys[project_key.public_key] = project_key.status == 0

    now = datetime.utcnow().replace(tzinfo=utc)

    org_options = OrganizationOption.objects.get_all_values(
        project.organization_id)

    cfg = {
        "disabled": project.status > 0,
        "slug": project.slug,
        "lastFetch": now,
        "lastChange": project.get_option("sentry:relay-rev-lastchange", now),
        "rev": project.get_option("sentry:relay-rev",
                                  uuid.uuid4().hex),
        "publicKeys": public_keys,
        "config": {
            "allowedDomains":
            project.get_option("sentry:origins", ["*"]),
            "trustedRelays":
            org_options.get("sentry:trusted-relays", []),
            "piiConfig":
            _get_pii_config(project),
            "datascrubbingSettings":
            _get_datascrubbing_settings(project, org_options),
        },
        "project_id": project.id,
    }

    if not full_config:
        # This is all we need for external Relay processors
        return ProjectConfig(project, **cfg)

    # The organization id is only required for reporting when processing events
    # internally. Do not expose it to external Relays.
    cfg["organization_id"] = project.organization_id

    # Explicitly bind Organization so we don't implicitly query it later
    # this just allows us to comfortably assure that `project.organization` is safe.
    # This also allows us to pull the object from cache, instead of being
    # implicitly fetched from database.
    project.organization = Organization.objects.get_from_cache(
        id=project.organization_id)

    if project.organization is not None:
        org_options = OrganizationOption.objects.get_all_values(
            project.organization_id)
    else:
        org_options = {}

    project_cfg = cfg["config"]

    # get the filter settings for this project
    filter_settings = {}
    project_cfg["filter_settings"] = filter_settings

    for flt in get_all_filters():
        filter_id = get_filter_key(flt)
        settings = _load_filter_settings(flt, project)
        filter_settings[filter_id] = settings

    invalid_releases = project.get_option(u"sentry:{}".format(
        FilterTypes.RELEASES))
    if invalid_releases:
        filter_settings[FilterTypes.RELEASES] = {"releases": invalid_releases}

    blacklisted_ips = project.get_option("sentry:blacklisted_ips")
    if blacklisted_ips:
        filter_settings["client_ips"] = {"blacklisted_ips": blacklisted_ips}

    error_messages = project.get_option(u"sentry:{}".format(
        FilterTypes.ERROR_MESSAGES))
    if error_messages:
        filter_settings[FilterTypes.ERROR_MESSAGES] = {
            "patterns": error_messages
        }

    csp_disallowed_sources = []
    if bool(project.get_option("sentry:csp_ignored_sources_defaults", True)):
        csp_disallowed_sources += DEFAULT_DISALLOWED_SOURCES
    csp_disallowed_sources += project.get_option("sentry:csp_ignored_sources",
                                                 [])
    if csp_disallowed_sources:
        filter_settings["csp"] = {"disallowed_sources": csp_disallowed_sources}

    scrub_ip_address = org_options.get("sentry:require_scrub_ip_address",
                                       False) or project.get_option(
                                           "sentry:scrub_ip_address", False)

    project_cfg["scrub_ip_addresses"] = scrub_ip_address

    project_cfg["grouping_config"] = get_grouping_config_dict_for_project(
        project)
    project_cfg["allowed_domains"] = list(get_origins(project))

    return ProjectConfig(project, **cfg)
Ejemplo n.º 14
0
def _project_has_hierarchical_grouping(project):
    config_dict = get_grouping_config_dict_for_project(project)
    config = load_grouping_config(config_dict)
    return config.initial_context["hierarchical_grouping"]
Ejemplo n.º 15
0
def get_project_config(project,
                       org_options=None,
                       full_config=True,
                       project_keys=None):
    """
    Constructs the ProjectConfig information.

    :param project: The project to load configuration for. Ensure that
        organization is bound on this object; otherwise it will be loaded from
        the database.
    :param org_options: Inject preloaded organization options for faster loading.
        If ``None``, options are lazy-loaded from the database.
    :param full_config: True if only the full config is required, False
        if only the restricted (for external relays) is required
        (default True, i.e. full configuration)
    :param project_keys: Pre-fetched project keys for performance, similar to
        org_options. However, if no project keys are provided it is assumed
        that the config does not need to contain auth information (this is the
        case when used in python's StoreView)

    :return: a ProjectConfig object for the given project
    """
    with configure_scope() as scope:
        scope.set_tag("project", project.id)

    public_keys = []

    for project_key in project_keys or ():
        key = {
            "publicKey": project_key.public_key,
            "isEnabled": project_key.status == 0
        }
        if full_config:
            key["numericId"] = project_key.id

            key["quotas"] = [
                quota.to_json()
                for quota in quotas.get_quotas(project, key=project_key)
            ]
        public_keys.append(key)

    now = datetime.utcnow().replace(tzinfo=utc)

    if org_options is None:
        org_options = OrganizationOption.objects.get_all_values(
            project.organization_id)

    with Hub.current.start_span(op="get_public_config"):

        cfg = {
            "disabled": project.status > 0,
            "slug": project.slug,
            "lastFetch": now,
            "lastChange": project.get_option("sentry:relay-rev-lastchange",
                                             now),
            "rev": project.get_option("sentry:relay-rev",
                                      uuid.uuid4().hex),
            "publicKeys": public_keys,
            "config": {
                "allowedDomains":
                list(get_origins(project)),
                "trustedRelays":
                org_options.get("sentry:trusted-relays", []),
                "piiConfig":
                _get_pii_config(project),
                "datascrubbingSettings":
                _get_datascrubbing_settings(project, org_options),
            },
            "projectId": project.id,
        }

    if not full_config:
        # This is all we need for external Relay processors
        return ProjectConfig(project, **cfg)

    # The organization id is only required for reporting when processing events
    # internally. Do not expose it to external Relays.
    cfg["organizationId"] = project.organization_id

    project_cfg = cfg["config"]

    with Hub.current.start_span(op="get_filter_settings"):
        # get the filter settings for this project
        filter_settings = {}
        project_cfg["filterSettings"] = filter_settings

        for flt in get_all_filters():
            filter_id = get_filter_key(flt)
            settings = _load_filter_settings(flt, project)
            filter_settings[filter_id] = settings

        invalid_releases = project.get_option(u"sentry:{}".format(
            FilterTypes.RELEASES))
        if invalid_releases:
            filter_settings["releases"] = {"releases": invalid_releases}

        blacklisted_ips = project.get_option("sentry:blacklisted_ips")
        if blacklisted_ips:
            filter_settings["clientIps"] = {"blacklistedIps": blacklisted_ips}

        error_messages = project.get_option(u"sentry:{}".format(
            FilterTypes.ERROR_MESSAGES))
        if error_messages:
            filter_settings["errorMessages"] = {"patterns": error_messages}

        csp_disallowed_sources = []
        if bool(project.get_option("sentry:csp_ignored_sources_defaults",
                                   True)):
            csp_disallowed_sources += DEFAULT_DISALLOWED_SOURCES
        csp_disallowed_sources += project.get_option(
            "sentry:csp_ignored_sources", [])
        if csp_disallowed_sources:
            filter_settings["csp"] = {
                "disallowedSources": csp_disallowed_sources
            }

    with Hub.current.start_span(op="get_grouping_config_dict_for_project"):
        project_cfg["groupingConfig"] = get_grouping_config_dict_for_project(
            project)

    return ProjectConfig(project, **cfg)
Ejemplo n.º 16
0
    def save(self, project_id, raw=False, assume_normalized=False, start_time=None, cache_key=None):
        """
        After normalizing and processing an event, save adjacent models such as
        releases and environments to postgres and write the event into
        eventstream. From there it will be picked up by Snuba and
        post-processing.

        We re-insert events with duplicate IDs into Snuba, which is responsible
        for deduplicating events. Since deduplication in Snuba is on the primary
        key (based on event ID, project ID and day), events with same IDs are only
        deduplicated if their timestamps fall on the same day. The latest event
        always wins and overwrites the value of events received earlier in that day.

        Since we increment counters and frequencies here before events get inserted
        to eventstream these numbers may be larger than the total number of
        events if we receive duplicate event IDs that fall on the same day
        (that do not hit cache first).
        """

        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize(project_id=project_id)
            self._normalized = True

        with metrics.timer("event_manager.save.project.get_from_cache"):
            project = Project.objects.get_from_cache(id=project_id)

        projects = {project.id: project}

        if self._data.get("type") == "transaction":
            self._data["project"] = int(project_id)
            job = {"data": self._data, "start_time": start_time}
            jobs = save_transaction_events([job], projects)
            return jobs[0]["event"]

        with metrics.timer("event_manager.save.organization.get_from_cache"):
            project._organization_cache = Organization.objects.get_from_cache(
                id=project.organization_id
            )

        job = {"data": self._data, "project_id": project_id, "raw": raw, "start_time": start_time}
        jobs = [job]

        _pull_out_data(jobs, projects)
        _get_or_create_release_many(jobs, projects)
        _get_event_user_many(jobs, projects)

        job["project_key"] = None
        if job["key_id"] is not None:
            with metrics.timer("event_manager.load_project_key"):
                try:
                    job["project_key"] = ProjectKey.objects.get_from_cache(id=job["key_id"])
                except ProjectKey.DoesNotExist:
                    pass

        with metrics.timer("event_manager.load_grouping_config"):
            # At this point we want to normalize the in_app values in case the
            # clients did not set this appropriately so far.
            grouping_config = load_grouping_config(
                get_grouping_config_dict_for_event_data(job["data"], project)
            )

        with metrics.timer("event_manager.normalize_stacktraces_for_grouping"):
            normalize_stacktraces_for_grouping(job["data"], grouping_config)

        _derive_plugin_tags_many(jobs, projects)
        _derive_interface_tags_many(jobs)

        with metrics.timer("event_manager.apply_server_fingerprinting"):
            # The active grouping config was put into the event in the
            # normalize step before.  We now also make sure that the
            # fingerprint was set to `'{{ default }}' just in case someone
            # removed it from the payload.  The call to get_hashes will then
            # look at `grouping_config` to pick the right parameters.
            job["data"]["fingerprint"] = job["data"].get("fingerprint") or ["{{ default }}"]
            apply_server_fingerprinting(job["data"], get_fingerprinting_config_for_project(project))

        with metrics.timer("event_manager.event.get_hashes"):
            # Here we try to use the grouping config that was requested in the
            # event.  If that config has since been deleted (because it was an
            # experimental grouping config) we fall back to the default.
            try:
                hashes = job["event"].get_hashes()
            except GroupingConfigNotFound:
                job["data"]["grouping_config"] = get_grouping_config_dict_for_project(project)
                hashes = job["event"].get_hashes()

        job["data"]["hashes"] = hashes

        _materialize_metadata_many(jobs)

        # The group gets the same metadata as the event when it's flushed but
        # additionally the `last_received` key is set.  This key is used by
        # _save_aggregate.
        group_metadata = dict(job["materialized_metadata"])
        group_metadata["last_received"] = job["received_timestamp"]
        kwargs = {
            "platform": job["platform"],
            "message": job["event"].search_message,
            "culprit": job["culprit"],
            "logger": job["logger_name"],
            "level": LOG_LEVELS_MAP.get(job["level"]),
            "last_seen": job["event"].datetime,
            "first_seen": job["event"].datetime,
            "active_at": job["event"].datetime,
            "data": group_metadata,
        }

        if job["release"]:
            kwargs["first_release"] = job["release"]

        # Load attachments first, but persist them at the very last after
        # posting to eventstream to make sure all counters and eventstream are
        # incremented for sure. Also wait for grouping to remove attachments
        # based on the group counter.
        with metrics.timer("event_manager.get_attachments"):
            attachments = get_attachments(cache_key, job)

        try:
            job["group"], job["is_new"], job["is_regression"] = _save_aggregate(
                event=job["event"], hashes=hashes, release=job["release"], **kwargs
            )
        except HashDiscarded:
            discard_event(job, attachments)
            raise

        job["event"].group = job["group"]

        # store a reference to the group id to guarantee validation of isolation
        # XXX(markus): No clue what this does
        job["event"].data.bind_ref(job["event"])

        _get_or_create_environment_many(jobs, projects)

        if job["group"]:
            group_environment, job["is_new_group_environment"] = GroupEnvironment.get_or_create(
                group_id=job["group"].id,
                environment_id=job["environment"].id,
                defaults={"first_release": job["release"] or None},
            )
        else:
            job["is_new_group_environment"] = False

        _get_or_create_release_associated_models(jobs, projects)

        if job["release"] and job["group"]:
            job["grouprelease"] = GroupRelease.get_or_create(
                group=job["group"],
                release=job["release"],
                environment=job["environment"],
                datetime=job["event"].datetime,
            )

        _tsdb_record_all_metrics(jobs)

        if job["group"]:
            UserReport.objects.filter(project=project, event_id=job["event"].event_id).update(
                group=job["group"], environment=job["environment"]
            )

        with metrics.timer("event_manager.filter_attachments_for_group"):
            attachments = filter_attachments_for_group(attachments, job)

        # XXX: DO NOT MUTATE THE EVENT PAYLOAD AFTER THIS POINT
        _materialize_event_metrics(jobs)

        for attachment in attachments:
            key = "bytes.stored.%s" % (attachment.type,)
            old_bytes = job["event_metrics"].get(key) or 0
            job["event_metrics"][key] = old_bytes + attachment.size

        _nodestore_save_many(jobs)
        save_unprocessed_event(project, event_id=job["event"].event_id)

        if job["release"]:
            if job["is_new"]:
                buffer.incr(
                    ReleaseProject,
                    {"new_groups": 1},
                    {"release_id": job["release"].id, "project_id": project.id},
                )
            if job["is_new_group_environment"]:
                buffer.incr(
                    ReleaseProjectEnvironment,
                    {"new_issues_count": 1},
                    {
                        "project_id": project.id,
                        "release_id": job["release"].id,
                        "environment_id": job["environment"].id,
                    },
                )

        if not raw:
            if not project.first_event:
                project.update(first_event=job["event"].datetime)
                first_event_received.send_robust(
                    project=project, event=job["event"], sender=Project
                )

        _eventstream_insert_many(jobs)

        # Do this last to ensure signals get emitted even if connection to the
        # file store breaks temporarily.
        with metrics.timer("event_manager.save_attachments"):
            save_attachments(cache_key, attachments, job)

        metric_tags = {"from_relay": "_relay_processed" in job["data"]}

        metrics.timing(
            "events.latency",
            job["received_timestamp"] - job["recorded_timestamp"],
            tags=metric_tags,
        )
        metrics.timing("events.size.data.post_save", job["event"].size, tags=metric_tags)
        metrics.incr(
            "events.post_save.normalize.errors",
            amount=len(job["data"].get("errors") or ()),
            tags=metric_tags,
        )

        _track_outcome_accepted_many(jobs)

        self._data = job["event"].data.data
        return job["event"]