Example #1
0
def test_project_config_with_span_attributes(default_project, insta_snapshot):
    # The span attributes config is not set with the flag turnd off
    cfg = get_project_config(default_project, full_config=True)
    cfg = cfg.to_dict()
    assert "spanAttributes" not in cfg["config"]

    with Feature("projects:performance-suspect-spans-ingestion"):
        cfg = get_project_config(default_project, full_config=True)

    cfg = cfg.to_dict()
    insta_snapshot(cfg["config"]["spanAttributes"])
Example #2
0
    def test_should_filter_message(self, mock_is_valid_error_message):
        TestItem = namedtuple("TestItem", "value formatted result")

        items = [
            TestItem({"type": "UnfilteredException"}, "UnfilteredException", True),
            TestItem(
                {"value": "This is an unfiltered exception."},
                "This is an unfiltered exception.",
                True,
            ),
            TestItem(
                {"type": "UnfilteredException", "value": "This is an unfiltered exception."},
                "UnfilteredException: This is an unfiltered exception.",
                True,
            ),
            TestItem(
                {"type": "FilteredException", "value": "This is a filtered exception."},
                "FilteredException: This is a filtered exception.",
                False,
            ),
        ]

        data = {"exception": {"values": [item.value for item in items]}}

        project_config = get_project_config(self.project)
        manager = EventManager(data, project=self.project, project_config=project_config)

        mock_is_valid_error_message.side_effect = [item.result for item in items]

        assert manager.should_filter() == (True, FilterStatKeys.ERROR_MESSAGE)

        assert mock_is_valid_error_message.call_args_list == [
            mock.call(project_config, item.formatted) for item in items
        ]
Example #3
0
def test_project_config_satisfaction_thresholds(
    default_project,
    insta_snapshot,
    has_project_transaction_threshold_overrides,
    has_project_transaction_threshold,
):
    if has_project_transaction_threshold:
        default_project.projecttransactionthreshold_set.create(
            organization=default_project.organization,
            threshold=500,
            metric=TransactionMetric.LCP.value,
        )
    if has_project_transaction_threshold_overrides:
        default_project.projecttransactionthresholdoverride_set.create(
            organization=default_project.organization,
            transaction="foo",
            threshold=400,
            metric=TransactionMetric.DURATION.value,
        )
        default_project.projecttransactionthresholdoverride_set.create(
            organization=default_project.organization,
            transaction="bar",
            threshold=600,
            metric=TransactionMetric.LCP.value,
        )
    with Feature(
        {
            "organizations:transaction-metrics-extraction": True,
        }
    ):
        cfg = get_project_config(default_project, full_config=True)

    cfg = cfg.to_dict()
    insta_snapshot(cfg["config"]["transactionMetrics"]["satisfactionThresholds"])
Example #4
0
def test_get_project_config(default_project, insta_snapshot, full,
                            has_ops_breakdown):
    # We could use the default_project fixture here, but we would like to avoid 1) hitting the db 2) creating a mock
    default_project.update_option("sentry:relay_pii_config", PII_CONFIG)
    default_project.organization.update_option("sentry:relay_pii_config",
                                               PII_CONFIG)
    keys = ProjectKey.objects.filter(project=default_project)

    with Feature(
        {"organizations:performance-ops-breakdown": has_ops_breakdown}):
        cfg = get_project_config(default_project,
                                 full_config=full,
                                 project_keys=keys)
        cfg = cfg.to_dict()

        # Remove keys that change everytime
        cfg.pop("lastChange")
        cfg.pop("lastFetch")
        cfg.pop("rev")

        # public keys change every time
        assert cfg.pop("projectId") == default_project.id
        assert len(cfg.pop("publicKeys")) == len(keys)
        assert cfg.pop("organizationId") == default_project.organization.id

        insta_snapshot(cfg)
Example #5
0
def test_get_project_config(default_project, insta_snapshot):
    # We could use the default_project fixture here, but we would like to avoid 1) hitting the db 2) creating a mock
    default_project.update_option("sentry:relay_pii_config", PII_CONFIG)
    default_project.organization.update_option("sentry:relay_pii_config",
                                               PII_CONFIG)
    cfg = get_project_config(default_project)

    insta_snapshot(cfg.config)
Example #6
0
def update_config_cache(generate, organization_id=None, project_id=None, update_reason=None):
    """
    Update the Redis cache for the Relay projectconfig. This task is invoked
    whenever a project/org option has been saved or smart quotas potentially
    caused a change in projectconfig.

    Either organization_id or project_id has to be provided.

    :param organization_id: The organization for which to invalidate configs.
    :param project_id: The project for which to invalidate configs.
    :param generate: If `True`, caches will be eagerly regenerated, not only
        invalidated.
    """

    from sentry.models import Project
    from sentry.relay import projectconfig_cache
    from sentry.relay.config import get_project_config

    # Delete key before generating configs such that we never have an outdated
    # but valid cache.
    #
    # If this was running at the end of the task, it would be more effective
    # against bursts of updates, but introduces a different race where an
    # outdated cache may be used.
    debounce_key = _get_schedule_debounce_key(project_id, organization_id)
    cache.delete(debounce_key)

    if project_id:
        projects = [Project.objects.get_from_cache(id=project_id)]
    elif organization_id:
        # XXX(markus): I feel like we should be able to cache this but I don't
        # want to add another method to src/sentry/db/models/manager.py
        projects = Project.objects.filter(organization_id=organization_id)

    if generate:
        project_keys = {}
        for key in ProjectKey.objects.filter(project_id__in=[project.id for project in projects]):
            project_keys.setdefault(key.project_id, []).append(key)

        project_configs = {}
        for project in projects:
            project_config = get_project_config(
                project, project_keys=project_keys.get(project.id, []), full_config=True
            )
            project_configs[project.id] = project_config.to_dict()

        projectconfig_cache.set_many(project_configs)
    else:
        projectconfig_cache.delete_many([project.id for project in projects])

    metrics.incr(
        "relay.projectconfig_cache.done",
        tags={"generate": generate, "update_reason": update_reason},
    )
Example #7
0
    def test_should_filter_message(self, mock_is_valid_error_message):
        TestItem = namedtuple('TestItem', 'value formatted result')

        items = [
            TestItem(
                {'type': 'UnfilteredException'},
                'UnfilteredException',
                True,
            ),
            TestItem(
                {'value': 'This is an unfiltered exception.'},
                'This is an unfiltered exception.',
                True,
            ),
            TestItem(
                {
                    'type': 'UnfilteredException',
                    'value': 'This is an unfiltered exception.'
                },
                'UnfilteredException: This is an unfiltered exception.',
                True,
            ),
            TestItem(
                {
                    'type': 'FilteredException',
                    'value': 'This is a filtered exception.'
                },
                'FilteredException: This is a filtered exception.',
                False,
            ),
        ]

        data = {
            'exception': {
                'values': [item.value for item in items]
            },
        }

        project_config = get_project_config(self.project.id, for_store=True)
        manager = EventManager(data,
                               project=self.project,
                               project_config=project_config)

        mock_is_valid_error_message.side_effect = [
            item.result for item in items
        ]

        assert manager.should_filter() == (True, FilterStatKeys.ERROR_MESSAGE)

        assert mock_is_valid_error_message.call_args_list == [
            mock.call(project_config, item.formatted) for item in items
        ]
    def post(self, request):

        relay = request.relay
        assert relay is not None  # should be provided during Authentication

        full_config_requested = request.relay_request_data.get("fullConfig")

        if full_config_requested and not relay.is_internal:
            return Response("Relay unauthorized for full config information", 403)

        project_ids = set(request.relay_request_data.get("projects") or ())
        if project_ids:
            projects = {p.id: p for p in Project.objects.filter(pk__in=project_ids)}
        else:
            projects = {}

        # Preload all organizations and their options to prevent repeated
        # database access when computing the project configuration.
        org_ids = set(project.organization_id for project in six.itervalues(projects))
        if org_ids:
            orgs = {
                o.id: o
                for o in Organization.objects.filter(pk__in=org_ids)
                if request.relay.has_org_access(o)
            }
        else:
            orgs = {}
        org_options = {i: OrganizationOption.objects.get_all_values(i) for i in six.iterkeys(orgs)}

        configs = {}
        for project_id in project_ids:
            configs[six.text_type(project_id)] = None

            project = projects.get(int(project_id))
            if project is None:
                continue

            organization = orgs.get(project.organization_id)
            if organization is None:
                continue

            project.organization = organization
            org_opts = org_options.get(organization.id) or {}

            project_config = config.get_project_config(
                project, org_options=org_opts, full_config=full_config_requested
            )
            configs[six.text_type(project_id)] = project_config.to_camel_case_dict()

        return Response({"configs": configs}, status=200)
Example #9
0
    def post(self, request):

        relay = request.relay
        assert relay is not None  # should be provided during Authentication

        full_config_requested = request.relay_request_data.get('fullConfig')

        if full_config_requested and not relay.is_internal:
            return Response("Relay unauthorized for full config information",
                            403)

        project_ids = request.relay_request_data.get('projects') or ()
        projects = {}

        orgs = set()

        # In the first iteration we fetch all configs that we know about
        # but only the project settings
        if project_ids:
            for project in Project.objects.filter(pk__in=project_ids):
                # for internal relays return the full, rich, configuration,
                # for external relays return the minimal config
                proj_config = config.get_project_config(
                    project.id, relay.is_internal and full_config_requested)

                projects[six.text_type(project.id)] = proj_config

                orgs.add(project.organization_id)

        # In the second iteration we check if the project has access to
        # the org at all.
        if orgs:
            orgs = {o.id: o for o in Organization.objects.filter(pk__in=orgs)}
            for cfg in list(projects.values()):
                org = orgs.get(cfg.project.organization_id)
                if org is None or not request.relay.has_org_access(org):
                    projects.pop(six.text_type(cfg.project.id))

        # Fill in configs that we failed the access check for or don't
        # exist.
        configs = {
            p_id: cfg.to_camel_case_dict()
            for p_id, cfg in six.iteritems(projects)
        }
        for project_id in project_ids:
            configs.setdefault(six.text_type(project_id), None)

        return Response({
            'configs': configs,
        }, status=200)
Example #10
0
def test_project_config_with_breakdown(default_project, insta_snapshot, transaction_metrics):
    with Feature(
        {
            "organizations:performance-ops-breakdown": True,
            "organizations:transaction-metrics-extraction": transaction_metrics == "with_metrics",
        }
    ):
        cfg = get_project_config(default_project, full_config=True)

    cfg = cfg.to_dict()
    insta_snapshot(
        {
            "breakdownsV2": cfg["config"]["breakdownsV2"],
            "transactionMetrics": cfg["config"].get("transactionMetrics"),
        }
    )
Example #11
0
def test_project_config_uses_filter_features(default_project, has_custom_filters):
    error_messages = ["some_error"]
    releases = ["1.2.3", "4.5.6"]
    default_project.update_option("sentry:error_messages", error_messages)
    default_project.update_option("sentry:releases", releases)

    with Feature({"projects:custom-inbound-filters": has_custom_filters}):
        cfg = get_project_config(default_project, full_config=True)

    cfg = cfg.to_dict()
    cfg_error_messages = get_path(cfg, "config", "filterSettings", "errorMessages")
    cfg_releases = get_path(cfg, "config", "filterSettings", "releases")

    if has_custom_filters:
        assert {"patterns": error_messages} == cfg_error_messages
        assert {"releases": releases} == cfg_releases
    else:
        assert cfg_releases is None
        assert cfg_error_messages is None
Example #12
0
def test_project_config_uses_filters_and_sampling_feature(
        default_project, dyn_sampling_data, has_dyn_sampling, full_config):
    """
    Tests that dynamic sampling information is retrieved for both "full config" and "restricted config"
    but only when the organization has "organizations:filter-and-sampling" feature enabled.
    """
    default_project.update_option("sentry:dynamic_sampling",
                                  dyn_sampling_data())

    with Feature({"organizations:filters-and-sampling": has_dyn_sampling}):
        cfg = get_project_config(default_project, full_config=full_config)

    cfg = cfg.to_dict()
    dynamic_sampling = get_path(cfg, "config", "dynamicSampling")

    if has_dyn_sampling:
        assert dynamic_sampling == dyn_sampling_data()
    else:
        assert dynamic_sampling is None
Example #13
0
    def _post(self, request):
        relay = request.relay
        assert relay is not None  # should be provided during Authentication

        full_config_requested = request.relay_request_data.get("fullConfig")

        if full_config_requested and not relay.is_internal:
            return Response("Relay unauthorized for full config information",
                            403)

        with Hub.current.start_span(op="relay_fetch_projects"):
            project_ids = set(request.relay_request_data.get("projects") or ())
            if project_ids:
                with metrics.timer(
                        "relay_project_configs.fetching_projects.duration"):
                    projects = {
                        p.id: p
                        for p in Project.objects.get_many_from_cache(
                            project_ids)
                    }
            else:
                projects = {}

        with Hub.current.start_span(op="relay_fetch_orgs"):
            # Preload all organizations and their options to prevent repeated
            # database access when computing the project configuration.
            org_ids = set(project.organization_id
                          for project in six.itervalues(projects))
            if org_ids:
                with metrics.timer(
                        "relay_project_configs.fetching_orgs.duration"):
                    orgs = {
                        o.id: o
                        for o in Organization.objects.get_many_from_cache(
                            org_ids) if request.relay.has_org_access(o)
                    }
            else:
                orgs = {}
            org_options = {
                i: OrganizationOption.objects.get_all_values(i)
                for i in six.iterkeys(orgs)
            }

        with Hub.current.start_span(op="relay_fetch_keys"):
            project_keys = {}
            for key in ProjectKey.objects.get_many_from_cache(
                    project_ids, key="project_id"):
                project_keys.setdefault(key.project_id, []).append(key)

        metrics.timing("relay_project_configs.projects_requested",
                       len(project_ids))
        metrics.timing("relay_project_configs.projects_fetched", len(projects))
        metrics.timing("relay_project_configs.orgs_fetched", len(orgs))

        configs = {}
        for project_id in project_ids:
            configs[six.text_type(project_id)] = None

            project = projects.get(int(project_id))
            if project is None:
                continue

            organization = orgs.get(project.organization_id)
            if organization is None:
                continue

            # Try to prevent organization from being fetched again in quotas.
            project.organization = organization
            project._organization_cache = organization

            org_opts = org_options.get(organization.id) or {}

            with Hub.current.start_span(op="get_config"):
                with metrics.timer(
                        "relay_project_configs.get_config.duration"):
                    project_config = config.get_project_config(
                        project,
                        org_options=org_opts,
                        full_config=full_config_requested,
                        project_keys=project_keys.get(project.id, []),
                    )

            configs[six.text_type(
                project_id)] = serialized_config = project_config.to_dict()
            config_size = len(json.dumps(serialized_config))
            metrics.timing("relay_project_configs.config_size", config_size)

            # Log if we see huge project configs
            if config_size >= PROJECT_CONFIG_SIZE_THRESHOLD:
                logger.info(
                    "relay.project_config.huge_config",
                    extra={
                        "project_id": project_id,
                        "size": config_size
                    },
                )

        return Response({"configs": configs}, status=200)
Example #14
0
    def _post(self, request):
        relay = request.relay
        assert relay is not None  # should be provided during Authentication

        full_config_requested = request.relay_request_data.get("fullConfig")

        if full_config_requested and not relay.is_internal:
            return Response("Relay unauthorized for full config information",
                            403)

        with Hub.current.start_span(op="relay_fetch_projects"):
            project_ids = set(request.relay_request_data.get("projects") or ())
            if project_ids:
                with metrics.timer(
                        "relay_project_configs.fetching_projects.duration"):
                    projects = {
                        p.id: p
                        for p in Project.objects.get_many_from_cache(
                            project_ids)
                    }
            else:
                projects = {}

        with Hub.current.start_span(op="relay_fetch_orgs"):
            # Preload all organizations and their options to prevent repeated
            # database access when computing the project configuration.
            org_ids = set(project.organization_id
                          for project in six.itervalues(projects))
            if org_ids:
                with metrics.timer(
                        "relay_project_configs.fetching_orgs.duration"):
                    orgs = Organization.objects.get_many_from_cache(org_ids)
                    orgs = {
                        o.id: o
                        for o in orgs if request.relay.has_org_access(o)
                    }
            else:
                orgs = {}
            org_options = {
                i: OrganizationOption.objects.get_all_values(i)
                for i in six.iterkeys(orgs)
            }

        with Hub.current.start_span(op="relay_fetch_keys"):
            project_keys = {}
            for key in ProjectKey.objects.filter(project_id__in=project_ids):
                project_keys.setdefault(key.project_id, []).append(key)

        metrics.timing("relay_project_configs.projects_requested",
                       len(project_ids))
        metrics.timing("relay_project_configs.projects_fetched", len(projects))
        metrics.timing("relay_project_configs.orgs_fetched", len(orgs))

        configs = {}
        for project_id in project_ids:
            configs[six.text_type(project_id)] = {"disabled": True}

            project = projects.get(int(project_id))
            if project is None:
                continue

            organization = orgs.get(project.organization_id)
            if organization is None:
                continue

            # Try to prevent organization from being fetched again in quotas.
            project.organization = organization
            project._organization_cache = organization

            with Hub.current.start_span(op="get_config"):
                with metrics.timer(
                        "relay_project_configs.get_config.duration"):
                    project_config = config.get_project_config(
                        project,
                        org_options=org_options.get(organization.id) or {},
                        full_config=full_config_requested,
                        project_keys=project_keys.get(project.id) or [],
                    )

            configs[six.text_type(project_id)] = project_config.to_dict()

        if full_config_requested:
            projectconfig_cache.set_many(configs)

        return Response({"configs": configs}, status=200)
    def _post_by_key(self, request, full_config_requested):
        public_keys = request.relay_request_data.get("publicKeys")
        public_keys = set(public_keys or ())

        project_keys = {}  # type: dict[str, ProjectKey]
        project_ids = set()  # type: set[int]

        with start_span(op="relay_fetch_keys"):
            with metrics.timer("relay_project_configs.fetching_keys.duration"):
                for key in ProjectKey.objects.get_many_from_cache(
                        public_keys, key="public_key"):
                    if key.status != ProjectKeyStatus.ACTIVE:
                        continue

                    project_keys[key.public_key] = key
                    project_ids.add(key.project_id)

        projects = {}  # type: dict[int, Project]
        organization_ids = set()  # type: set[int]

        with start_span(op="relay_fetch_projects"):
            with metrics.timer(
                    "relay_project_configs.fetching_projects.duration"):
                for project in Project.objects.get_many_from_cache(
                        project_ids):
                    projects[project.id] = project
                    organization_ids.add(project.organization_id)

        # Preload all organizations and their options to prevent repeated
        # database access when computing the project configuration.

        orgs = {}  # type: dict[int, Organization]

        with start_span(op="relay_fetch_orgs"):
            with metrics.timer("relay_project_configs.fetching_orgs.duration"):
                for org in Organization.objects.get_many_from_cache(
                        organization_ids):
                    if request.relay.has_org_access(org):
                        orgs[org.id] = org

        with start_span(op="relay_fetch_org_options"):
            with metrics.timer(
                    "relay_project_configs.fetching_org_options.duration"):
                for org_id in orgs:
                    OrganizationOption.objects.get_all_values(org_id)

        metrics.timing("relay_project_configs.projects_requested",
                       len(project_ids))
        metrics.timing("relay_project_configs.projects_fetched", len(projects))
        metrics.timing("relay_project_configs.orgs_fetched", len(orgs))

        configs = {}
        for public_key in public_keys:
            configs[public_key] = {"disabled": True}

            key = project_keys.get(public_key)
            if key is None:
                continue

            project = projects.get(key.project_id)
            if project is None:
                continue

            organization = orgs.get(project.organization_id)
            if organization is None:
                continue

            # Try to prevent organization from being fetched again in quotas.
            project.organization = organization
            project._organization_cache = organization

            with Hub.current.start_span(op="get_config"):
                with metrics.timer(
                        "relay_project_configs.get_config.duration"):
                    project_config = config.get_project_config(
                        project,
                        full_config=full_config_requested,
                        project_keys=[key],
                    )

            configs[public_key] = project_config.to_dict()

        if full_config_requested:
            projectconfig_cache.set_many(configs)

        return Response({"configs": configs}, status=200)
Example #16
0
def update_config_cache(generate,
                        organization_id=None,
                        project_id=None,
                        public_key=None,
                        update_reason=None):
    """
    Update the Redis cache for the Relay projectconfig. This task is invoked
    whenever a project/org option has been saved or smart quotas potentially
    caused a change in projectconfig.

    Either organization_id or project_id has to be provided.

    :param organization_id: The organization for which to invalidate configs.
    :param project_id: The project for which to invalidate configs.
    :param generate: If `True`, caches will be eagerly regenerated, not only
        invalidated.
    """

    from sentry.models import Project, ProjectKey, ProjectKeyStatus
    from sentry.relay import projectconfig_cache
    from sentry.relay.config import get_project_config

    if project_id:
        set_current_event_project(project_id)

    if organization_id:
        # Cannot use bind_organization_context here because we do not have a
        # model and don't want to fetch one
        sentry_sdk.set_tag("organization_id", organization_id)

    if public_key:
        sentry_sdk.set_tag("public_key", public_key)

    sentry_sdk.set_tag("update_reason", update_reason)
    sentry_sdk.set_tag("generate", generate)

    # Delete key before generating configs such that we never have an outdated
    # but valid cache.
    #
    # If this was running at the end of the task, it would be more effective
    # against bursts of updates, but introduces a different race where an
    # outdated cache may be used.
    projectconfig_debounce_cache.mark_task_done(public_key, project_id,
                                                organization_id)

    if organization_id:
        projects = list(
            Project.objects.filter(organization_id=organization_id))
        keys = list(ProjectKey.objects.filter(project__in=projects))
    elif project_id:
        projects = [Project.objects.get(id=project_id)]
        keys = list(ProjectKey.objects.filter(project__in=projects))
    elif public_key:
        try:
            keys = [ProjectKey.objects.get(public_key=public_key)]
        except ProjectKey.DoesNotExist:
            # In this particular case, where a project key got deleted and
            # triggered an update, we at least know the public key that needs
            # to be deleted from cache.
            #
            # In other similar cases, like an org being deleted, we potentially
            # cannot find any keys anymore, so we don't know which cache keys
            # to delete.
            projectconfig_cache.delete_many([public_key])
            return

    else:
        assert False

    if generate:
        config_cache = {}
        for key in keys:
            if key.status != ProjectKeyStatus.ACTIVE:
                project_config = {"disabled": True}
            else:
                project_config = get_project_config(
                    key.project, project_keys=[key],
                    full_config=True).to_dict()
            config_cache[key.public_key] = project_config

        projectconfig_cache.set_many(config_cache)
    else:
        cache_keys_to_delete = []
        for key in keys:
            cache_keys_to_delete.append(key.public_key)

        projectconfig_cache.delete_many(cache_keys_to_delete)
Example #17
0
def _do_process_event(cache_key, start_time, event_id, process_task, data=None):
    from sentry.plugins.base import plugins

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr(
            "events.failed", tags={"reason": "cache", "stage": "process"}, skip_internal=False
        )
        error_logger.error("process.failed.empty", extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    event_id = data["event_id"]

    project = Project.objects.get_from_cache(id=project_id)

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    has_changed = False

    # Fetch the reprocessing revision
    reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    try:
        # Event enhancers.  These run before anything else.
        for plugin in plugins.all(version=2):
            enhancers = safe_execute(plugin.get_event_enhancers, data=data)
            for enhancer in enhancers or ():
                enhanced = safe_execute(enhancer, data, _passthrough_errors=(RetrySymbolication,))
                if enhanced:
                    data = enhanced
                    has_changed = True

        # Stacktrace based event processors.
        new_data = process_stacktraces(data)
        if new_data is not None:
            has_changed = True
            data = new_data
    except RetrySymbolication as e:
        if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
            error_logger.warning(
                "process.slow", extra={"project_id": project_id, "event_id": event_id}
            )

        if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
            # Do not drop event but actually continue with rest of pipeline
            # (persisting unsymbolicated event)
            error_logger.exception(
                "process.failed.infinite_retry",
                extra={"project_id": project_id, "event_id": event_id},
            )
        else:
            retry_process_event.apply_async(
                args=(),
                kwargs={
                    "process_task_name": process_task.__name__,
                    "task_kwargs": {
                        "cache_key": cache_key,
                        "event_id": event_id,
                        "start_time": start_time,
                    },
                },
                countdown=e.retry_after,
            )
            return

    # Second round of datascrubbing after stacktrace and language-specific
    # processing. First round happened as part of ingest.
    #
    # We assume that all potential PII is produced as part of stacktrace
    # processors and event enhancers.
    #
    # We assume that plugins for eg sessionstack (running via
    # `plugin.get_event_preprocessors`) are not producing data that should be
    # PII-stripped, ever.
    #
    # XXX(markus): Javascript event error translation is happening after this block
    # because it uses `get_event_preprocessors` instead of
    # `get_event_enhancers`, possibly move?
    if has_changed and features.has(
        "organizations:datascrubbers-v2", project.organization, actor=None
    ):
        with metrics.timer("tasks.store.datascrubbers.scrub"):
            project_config = get_project_config(project)

            new_data = safe_execute(scrub_data, project_config=project_config, event=data.data)

            # XXX(markus): When datascrubbing is finally "totally stable", we might want
            # to drop the event if it crashes to avoid saving PII
            if new_data is not None:
                data.data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        processors = safe_execute(
            plugin.get_event_preprocessors, data=data, _with_transaction=False
        )
        for processor in processors or ():
            result = safe_execute(processor, data)
            if result:
                data = result
                has_changed = True

    assert data["project"] == project_id, "Project cannot be mutated by plugins"

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(
            remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS
        )
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                cache_key,
                data,
                project_id,
                list(issues.values()),
                event_id=event_id,
                start_time=start_time,
                reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke outselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            from_reprocessing = process_task is process_event_from_reprocessing
            submit_process(project, from_reprocessing, cache_key, event_id, start_time, data)
            process_task.delay(cache_key, start_time=start_time, event_id=event_id)
            return

        default_cache.set(cache_key, data, 3600)

    submit_save_event(project, cache_key, event_id, start_time, data)
Example #18
0
def test_project_config_with_breakdown(default_project, insta_snapshot):
    with Feature("organizations:performance-ops-breakdown"):
        cfg = get_project_config(default_project, full_config=True)

    cfg = cfg.to_dict()
    insta_snapshot(cfg["config"]["breakdowns"])
Example #19
0
 def is_valid_ip(self, ip, inputs):
     self.project.update_option('sentry:blacklisted_ips', inputs)
     project_config = get_project_config(self.project.id)
     return is_valid_ip(project_config, ip)
    def _post_by_project(self, request, full_config_requested):
        project_ids = set(request.relay_request_data.get("projects") or ())

        with start_span(op="relay_fetch_projects"):
            if project_ids:
                with metrics.timer(
                        "relay_project_configs.fetching_projects.duration"):
                    projects = {
                        p.id: p
                        for p in Project.objects.get_many_from_cache(
                            project_ids)
                    }
            else:
                projects = {}

        with start_span(op="relay_fetch_orgs"):
            # Preload all organizations and their options to prevent repeated
            # database access when computing the project configuration.
            org_ids = {
                project.organization_id
                for project in projects.values()
            }
            if org_ids:
                with metrics.timer(
                        "relay_project_configs.fetching_orgs.duration"):
                    orgs = Organization.objects.get_many_from_cache(org_ids)
                    orgs = {
                        o.id: o
                        for o in orgs if request.relay.has_org_access(o)
                    }
            else:
                orgs = {}

            with metrics.timer(
                    "relay_project_configs.fetching_org_options.duration"):
                for org_id in orgs.keys():
                    OrganizationOption.objects.get_all_values(org_id)

        with start_span(op="relay_fetch_keys"):
            project_keys = {}
            for key in ProjectKey.objects.filter(project_id__in=project_ids):
                project_keys.setdefault(key.project_id, []).append(key)

        metrics.timing("relay_project_configs.projects_requested",
                       len(project_ids))
        metrics.timing("relay_project_configs.projects_fetched", len(projects))
        metrics.timing("relay_project_configs.orgs_fetched", len(orgs))

        configs = {}
        for project_id in project_ids:
            configs[str(project_id)] = {"disabled": True}

            project = projects.get(int(project_id))
            if project is None:
                continue

            organization = orgs.get(project.organization_id)
            if organization is None:
                continue

            # Try to prevent organization from being fetched again in quotas.
            project.organization = organization
            project._organization_cache = organization

            with start_span(op="get_config"):
                with metrics.timer(
                        "relay_project_configs.get_config.duration"):
                    project_config = config.get_project_config(
                        project,
                        full_config=full_config_requested,
                        project_keys=project_keys.get(project.id) or [],
                    )

            configs[str(project_id)] = project_config.to_dict()

        if full_config_requested:
            projectconfig_cache.set_many(configs)

        return Response({"configs": configs}, status=200)
Example #21
0
 def is_valid_release(self, value, inputs):
     self.project.update_option(u'sentry:{}'.format(FilterTypes.RELEASES),
                                inputs)
     project_config = get_project_config(self.project.id)
     return is_valid_release(project_config, value)
Example #22
0
def _do_process_event(
    cache_key,
    start_time,
    event_id,
    process_task,
    data=None,
    data_has_changed=None,
    from_symbolicate=False,
):
    from sentry.plugins.base import plugins

    if data is None:
        data = default_cache.get(cache_key)

    if data is None:
        metrics.incr(
            "events.failed", tags={"reason": "cache", "stage": "process"}, skip_internal=False
        )
        error_logger.error("process.failed.empty", extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_project(project_id)

    event_id = data["event_id"]

    with sentry_sdk.start_span(op="tasks.store.process_event.get_project_from_cache"):
        project = Project.objects.get_from_cache(id=project_id)

    with metrics.timer("tasks.store.process_event.organization.get_from_cache"):
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id
        )

    has_changed = bool(data_has_changed)

    with sentry_sdk.start_span(op="tasks.store.process_event.get_reprocessing_revision"):
        # Fetch the reprocessing revision
        reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    # Stacktrace based event processors.
    with sentry_sdk.start_span(op="task.store.process_event.stacktraces"):
        with metrics.timer(
            "tasks.store.process_event.stacktraces", tags={"from_symbolicate": from_symbolicate}
        ):
            new_data = process_stacktraces(data)

    if new_data is not None:
        has_changed = True
        data = new_data

    # Second round of datascrubbing after stacktrace and language-specific
    # processing. First round happened as part of ingest.
    #
    # *Right now* the only sensitive data that is added in stacktrace
    # processing are usernames in filepaths, so we run directly after
    # stacktrace processors.
    #
    # We do not yet want to deal with context data produced by plugins like
    # sessionstack or fullstory (which are in `get_event_preprocessors`), as
    # this data is very unlikely to be sensitive data. This is why scrubbing
    # happens somewhere in the middle of the pipeline.
    #
    # On the other hand, Javascript event error translation is happening after
    # this block because it uses `get_event_preprocessors` instead of
    # `get_event_enhancers`.
    #
    # We are fairly confident, however, that this should run *before*
    # re-normalization as it is hard to find sensitive data in partially
    # trimmed strings.
    if (
        has_changed
        and options.get("processing.can-use-scrubbers")
        and features.has("organizations:datascrubbers-v2", project.organization, actor=None)
    ):
        with sentry_sdk.start_span(op="task.store.datascrubbers.scrub"):
            with metrics.timer(
                "tasks.store.datascrubbers.scrub", tags={"from_symbolicate": from_symbolicate}
            ):
                project_config = get_project_config(project)

                new_data = safe_execute(scrub_data, project_config=project_config, event=data.data)

                # XXX(markus): When datascrubbing is finally "totally stable", we might want
                # to drop the event if it crashes to avoid saving PII
                if new_data is not None:
                    data.data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        with sentry_sdk.start_span(op="task.store.process_event.preprocessors") as span:
            span.set_data("plugin", plugin.slug)
            span.set_data("from_symbolicate", from_symbolicate)
            with metrics.timer(
                "tasks.store.process_event.preprocessors",
                tags={"plugin": plugin.slug, "from_symbolicate": from_symbolicate},
            ):
                processors = safe_execute(
                    plugin.get_event_preprocessors, data=data, _with_transaction=False
                )
                for processor in processors or ():
                    result = safe_execute(processor, data)
                    if result:
                        data = result
                        has_changed = True

    assert data["project"] == project_id, "Project cannot be mutated by plugins"

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(
            remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS
        )
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                cache_key,
                data,
                project_id,
                list(issues.values()),
                event_id=event_id,
                start_time=start_time,
                reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke ourselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project)
            return

        default_cache.set(cache_key, data, 3600)

    submit_save_event(project, cache_key, event_id, start_time, data)
Example #23
0
 def is_valid_error_message(self, value, inputs):
     self.project.update_option(
         u'sentry:{}'.format(FilterTypes.ERROR_MESSAGES), inputs)
     project_config = get_project_config(self.project.id)
     return is_valid_error_message(project_config, value)
Example #24
0
    def dispatch(self, request, project_id=None, *args, **kwargs):
        helper = None
        try:
            helper = ClientApiHelper(
                agent=request.META.get("HTTP_USER_AGENT"),
                project_id=project_id,
                ip_address=request.META["REMOTE_ADDR"],
            )

            # if the project id is not directly specified get it from the authentication information
            project_id = _get_project_id_from_request(
                project_id, request, self.auth_helper_cls, helper
            )

            project_config = get_project_config(project_id, for_store=True)

            helper.context.bind_project(project_config.project)

            if kafka_publisher is not None:
                self._publish_to_kafka(request, project_config)

            origin = self.auth_helper_cls.origin_from_request(request)

            response = self._dispatch(
                request, helper, project_config, origin=origin, *args, **kwargs
            )
        except APIError as e:
            context = {"error": force_bytes(e.msg, errors="replace")}
            if e.name:
                context["error_name"] = e.name

            response = HttpResponse(
                json.dumps(context), content_type="application/json", status=e.http_status
            )
            # Set X-Sentry-Error as in many cases it is easier to inspect the headers
            response["X-Sentry-Error"] = context["error"]

            if isinstance(e, APIRateLimited) and e.retry_after is not None:
                response["Retry-After"] = six.text_type(int(math.ceil(e.retry_after)))

        except Exception as e:
            # TODO(dcramer): test failures are not outputting the log message
            # here
            if settings.DEBUG:
                content = traceback.format_exc()
            else:
                content = ""
            logger.exception(e)
            response = HttpResponse(content, content_type="text/plain", status=500)

        # TODO(dcramer): it'd be nice if we had an incr_multi method so
        # tsdb could optimize this
        metrics.incr("client-api.all-versions.requests", skip_internal=False)
        metrics.incr(
            "client-api.all-versions.responses.%s" % (response.status_code,), skip_internal=False
        )
        metrics.incr(
            "client-api.all-versions.responses.%sxx" % (six.text_type(response.status_code)[0],),
            skip_internal=False,
        )

        if helper is not None and helper.context is not None and helper.context.version:
            metrics.incr("client-api.v%s.requests" % (helper.context.version,), skip_internal=False)
            metrics.incr(
                "client-api.v%s.responses.%s" % (helper.context.version, response.status_code),
                skip_internal=False,
            )
            metrics.incr(
                "client-api.v%s.responses.%sxx"
                % (helper.context.version, six.text_type(response.status_code)[0]),
                skip_internal=False,
            )

        return response
Example #25
0
def update_config_cache(generate,
                        organization_id=None,
                        project_id=None,
                        update_reason=None):
    """
    Update the Redis cache for the Relay projectconfig. This task is invoked
    whenever a project/org option has been saved or smart quotas potentially
    caused a change in projectconfig.

    Either organization_id or project_id has to be provided.

    :param organization_id: The organization for which to invalidate configs.
    :param project_id: The project for which to invalidate configs.
    :param generate: If `True`, caches will be eagerly regenerated, not only
        invalidated.
    """

    from sentry.models import Project, ProjectKey, ProjectKeyStatus
    from sentry.relay import projectconfig_cache
    from sentry.relay.config import get_project_config

    if project_id:
        set_current_event_project(project_id)

    if organization_id:
        # Cannot use bind_organization_context here because we do not have a
        # model and don't want to fetch one
        sentry_sdk.set_tag("organization_id", organization_id)

    sentry_sdk.set_tag("update_reason", update_reason)
    sentry_sdk.set_tag("generate", generate)

    # Delete key before generating configs such that we never have an outdated
    # but valid cache.
    #
    # If this was running at the end of the task, it would be more effective
    # against bursts of updates, but introduces a different race where an
    # outdated cache may be used.
    projectconfig_debounce_cache.mark_task_done(project_id, organization_id)

    if project_id:
        projects = [Project.objects.get_from_cache(id=project_id)]
    elif organization_id:
        # XXX(markus): I feel like we should be able to cache this but I don't
        # want to add another method to src/sentry/db/models/manager.py
        projects = Project.objects.filter(organization_id=organization_id)

    project_keys = {}
    for key in ProjectKey.objects.filter(
            project_id__in=[project.id for project in projects]):
        project_keys.setdefault(key.project_id, []).append(key)

    if generate:
        config_cache = {}
        for project in projects:
            project_config = get_project_config(project,
                                                project_keys=project_keys.get(
                                                    project.id, []),
                                                full_config=True)
            config_cache[project.id] = project_config.to_dict()

            for key in project_keys.get(project.id) or ():
                # XXX(markus): This is currently the cleanest way to get only
                # state for a single projectkey (considering quotas and
                # everything)
                if key.status != ProjectKeyStatus.ACTIVE:
                    continue

                project_config = get_project_config(project,
                                                    project_keys=[key],
                                                    full_config=True)
                config_cache[key.public_key] = project_config.to_dict()

        projectconfig_cache.set_many(config_cache)
    else:
        cache_keys_to_delete = []
        for project in projects:
            cache_keys_to_delete.append(project.id)
            for key in project_keys.get(project.id) or ():
                cache_keys_to_delete.append(key.public_key)

        projectconfig_cache.delete_many(cache_keys_to_delete)

    metrics.incr(
        "relay.projectconfig_cache.done",
        tags={
            "generate": generate,
            "update_reason": update_reason
        },
    )