def test_project_config_with_span_attributes(default_project, insta_snapshot): # The span attributes config is not set with the flag turnd off cfg = get_project_config(default_project, full_config=True) cfg = cfg.to_dict() assert "spanAttributes" not in cfg["config"] with Feature("projects:performance-suspect-spans-ingestion"): cfg = get_project_config(default_project, full_config=True) cfg = cfg.to_dict() insta_snapshot(cfg["config"]["spanAttributes"])
def test_should_filter_message(self, mock_is_valid_error_message): TestItem = namedtuple("TestItem", "value formatted result") items = [ TestItem({"type": "UnfilteredException"}, "UnfilteredException", True), TestItem( {"value": "This is an unfiltered exception."}, "This is an unfiltered exception.", True, ), TestItem( {"type": "UnfilteredException", "value": "This is an unfiltered exception."}, "UnfilteredException: This is an unfiltered exception.", True, ), TestItem( {"type": "FilteredException", "value": "This is a filtered exception."}, "FilteredException: This is a filtered exception.", False, ), ] data = {"exception": {"values": [item.value for item in items]}} project_config = get_project_config(self.project) manager = EventManager(data, project=self.project, project_config=project_config) mock_is_valid_error_message.side_effect = [item.result for item in items] assert manager.should_filter() == (True, FilterStatKeys.ERROR_MESSAGE) assert mock_is_valid_error_message.call_args_list == [ mock.call(project_config, item.formatted) for item in items ]
def test_project_config_satisfaction_thresholds( default_project, insta_snapshot, has_project_transaction_threshold_overrides, has_project_transaction_threshold, ): if has_project_transaction_threshold: default_project.projecttransactionthreshold_set.create( organization=default_project.organization, threshold=500, metric=TransactionMetric.LCP.value, ) if has_project_transaction_threshold_overrides: default_project.projecttransactionthresholdoverride_set.create( organization=default_project.organization, transaction="foo", threshold=400, metric=TransactionMetric.DURATION.value, ) default_project.projecttransactionthresholdoverride_set.create( organization=default_project.organization, transaction="bar", threshold=600, metric=TransactionMetric.LCP.value, ) with Feature( { "organizations:transaction-metrics-extraction": True, } ): cfg = get_project_config(default_project, full_config=True) cfg = cfg.to_dict() insta_snapshot(cfg["config"]["transactionMetrics"]["satisfactionThresholds"])
def test_get_project_config(default_project, insta_snapshot, full, has_ops_breakdown): # We could use the default_project fixture here, but we would like to avoid 1) hitting the db 2) creating a mock default_project.update_option("sentry:relay_pii_config", PII_CONFIG) default_project.organization.update_option("sentry:relay_pii_config", PII_CONFIG) keys = ProjectKey.objects.filter(project=default_project) with Feature( {"organizations:performance-ops-breakdown": has_ops_breakdown}): cfg = get_project_config(default_project, full_config=full, project_keys=keys) cfg = cfg.to_dict() # Remove keys that change everytime cfg.pop("lastChange") cfg.pop("lastFetch") cfg.pop("rev") # public keys change every time assert cfg.pop("projectId") == default_project.id assert len(cfg.pop("publicKeys")) == len(keys) assert cfg.pop("organizationId") == default_project.organization.id insta_snapshot(cfg)
def test_get_project_config(default_project, insta_snapshot): # We could use the default_project fixture here, but we would like to avoid 1) hitting the db 2) creating a mock default_project.update_option("sentry:relay_pii_config", PII_CONFIG) default_project.organization.update_option("sentry:relay_pii_config", PII_CONFIG) cfg = get_project_config(default_project) insta_snapshot(cfg.config)
def update_config_cache(generate, organization_id=None, project_id=None, update_reason=None): """ Update the Redis cache for the Relay projectconfig. This task is invoked whenever a project/org option has been saved or smart quotas potentially caused a change in projectconfig. Either organization_id or project_id has to be provided. :param organization_id: The organization for which to invalidate configs. :param project_id: The project for which to invalidate configs. :param generate: If `True`, caches will be eagerly regenerated, not only invalidated. """ from sentry.models import Project from sentry.relay import projectconfig_cache from sentry.relay.config import get_project_config # Delete key before generating configs such that we never have an outdated # but valid cache. # # If this was running at the end of the task, it would be more effective # against bursts of updates, but introduces a different race where an # outdated cache may be used. debounce_key = _get_schedule_debounce_key(project_id, organization_id) cache.delete(debounce_key) if project_id: projects = [Project.objects.get_from_cache(id=project_id)] elif organization_id: # XXX(markus): I feel like we should be able to cache this but I don't # want to add another method to src/sentry/db/models/manager.py projects = Project.objects.filter(organization_id=organization_id) if generate: project_keys = {} for key in ProjectKey.objects.filter(project_id__in=[project.id for project in projects]): project_keys.setdefault(key.project_id, []).append(key) project_configs = {} for project in projects: project_config = get_project_config( project, project_keys=project_keys.get(project.id, []), full_config=True ) project_configs[project.id] = project_config.to_dict() projectconfig_cache.set_many(project_configs) else: projectconfig_cache.delete_many([project.id for project in projects]) metrics.incr( "relay.projectconfig_cache.done", tags={"generate": generate, "update_reason": update_reason}, )
def test_should_filter_message(self, mock_is_valid_error_message): TestItem = namedtuple('TestItem', 'value formatted result') items = [ TestItem( {'type': 'UnfilteredException'}, 'UnfilteredException', True, ), TestItem( {'value': 'This is an unfiltered exception.'}, 'This is an unfiltered exception.', True, ), TestItem( { 'type': 'UnfilteredException', 'value': 'This is an unfiltered exception.' }, 'UnfilteredException: This is an unfiltered exception.', True, ), TestItem( { 'type': 'FilteredException', 'value': 'This is a filtered exception.' }, 'FilteredException: This is a filtered exception.', False, ), ] data = { 'exception': { 'values': [item.value for item in items] }, } project_config = get_project_config(self.project.id, for_store=True) manager = EventManager(data, project=self.project, project_config=project_config) mock_is_valid_error_message.side_effect = [ item.result for item in items ] assert manager.should_filter() == (True, FilterStatKeys.ERROR_MESSAGE) assert mock_is_valid_error_message.call_args_list == [ mock.call(project_config, item.formatted) for item in items ]
def post(self, request): relay = request.relay assert relay is not None # should be provided during Authentication full_config_requested = request.relay_request_data.get("fullConfig") if full_config_requested and not relay.is_internal: return Response("Relay unauthorized for full config information", 403) project_ids = set(request.relay_request_data.get("projects") or ()) if project_ids: projects = {p.id: p for p in Project.objects.filter(pk__in=project_ids)} else: projects = {} # Preload all organizations and their options to prevent repeated # database access when computing the project configuration. org_ids = set(project.organization_id for project in six.itervalues(projects)) if org_ids: orgs = { o.id: o for o in Organization.objects.filter(pk__in=org_ids) if request.relay.has_org_access(o) } else: orgs = {} org_options = {i: OrganizationOption.objects.get_all_values(i) for i in six.iterkeys(orgs)} configs = {} for project_id in project_ids: configs[six.text_type(project_id)] = None project = projects.get(int(project_id)) if project is None: continue organization = orgs.get(project.organization_id) if organization is None: continue project.organization = organization org_opts = org_options.get(organization.id) or {} project_config = config.get_project_config( project, org_options=org_opts, full_config=full_config_requested ) configs[six.text_type(project_id)] = project_config.to_camel_case_dict() return Response({"configs": configs}, status=200)
def post(self, request): relay = request.relay assert relay is not None # should be provided during Authentication full_config_requested = request.relay_request_data.get('fullConfig') if full_config_requested and not relay.is_internal: return Response("Relay unauthorized for full config information", 403) project_ids = request.relay_request_data.get('projects') or () projects = {} orgs = set() # In the first iteration we fetch all configs that we know about # but only the project settings if project_ids: for project in Project.objects.filter(pk__in=project_ids): # for internal relays return the full, rich, configuration, # for external relays return the minimal config proj_config = config.get_project_config( project.id, relay.is_internal and full_config_requested) projects[six.text_type(project.id)] = proj_config orgs.add(project.organization_id) # In the second iteration we check if the project has access to # the org at all. if orgs: orgs = {o.id: o for o in Organization.objects.filter(pk__in=orgs)} for cfg in list(projects.values()): org = orgs.get(cfg.project.organization_id) if org is None or not request.relay.has_org_access(org): projects.pop(six.text_type(cfg.project.id)) # Fill in configs that we failed the access check for or don't # exist. configs = { p_id: cfg.to_camel_case_dict() for p_id, cfg in six.iteritems(projects) } for project_id in project_ids: configs.setdefault(six.text_type(project_id), None) return Response({ 'configs': configs, }, status=200)
def test_project_config_with_breakdown(default_project, insta_snapshot, transaction_metrics): with Feature( { "organizations:performance-ops-breakdown": True, "organizations:transaction-metrics-extraction": transaction_metrics == "with_metrics", } ): cfg = get_project_config(default_project, full_config=True) cfg = cfg.to_dict() insta_snapshot( { "breakdownsV2": cfg["config"]["breakdownsV2"], "transactionMetrics": cfg["config"].get("transactionMetrics"), } )
def test_project_config_uses_filter_features(default_project, has_custom_filters): error_messages = ["some_error"] releases = ["1.2.3", "4.5.6"] default_project.update_option("sentry:error_messages", error_messages) default_project.update_option("sentry:releases", releases) with Feature({"projects:custom-inbound-filters": has_custom_filters}): cfg = get_project_config(default_project, full_config=True) cfg = cfg.to_dict() cfg_error_messages = get_path(cfg, "config", "filterSettings", "errorMessages") cfg_releases = get_path(cfg, "config", "filterSettings", "releases") if has_custom_filters: assert {"patterns": error_messages} == cfg_error_messages assert {"releases": releases} == cfg_releases else: assert cfg_releases is None assert cfg_error_messages is None
def test_project_config_uses_filters_and_sampling_feature( default_project, dyn_sampling_data, has_dyn_sampling, full_config): """ Tests that dynamic sampling information is retrieved for both "full config" and "restricted config" but only when the organization has "organizations:filter-and-sampling" feature enabled. """ default_project.update_option("sentry:dynamic_sampling", dyn_sampling_data()) with Feature({"organizations:filters-and-sampling": has_dyn_sampling}): cfg = get_project_config(default_project, full_config=full_config) cfg = cfg.to_dict() dynamic_sampling = get_path(cfg, "config", "dynamicSampling") if has_dyn_sampling: assert dynamic_sampling == dyn_sampling_data() else: assert dynamic_sampling is None
def _post(self, request): relay = request.relay assert relay is not None # should be provided during Authentication full_config_requested = request.relay_request_data.get("fullConfig") if full_config_requested and not relay.is_internal: return Response("Relay unauthorized for full config information", 403) with Hub.current.start_span(op="relay_fetch_projects"): project_ids = set(request.relay_request_data.get("projects") or ()) if project_ids: with metrics.timer( "relay_project_configs.fetching_projects.duration"): projects = { p.id: p for p in Project.objects.get_many_from_cache( project_ids) } else: projects = {} with Hub.current.start_span(op="relay_fetch_orgs"): # Preload all organizations and their options to prevent repeated # database access when computing the project configuration. org_ids = set(project.organization_id for project in six.itervalues(projects)) if org_ids: with metrics.timer( "relay_project_configs.fetching_orgs.duration"): orgs = { o.id: o for o in Organization.objects.get_many_from_cache( org_ids) if request.relay.has_org_access(o) } else: orgs = {} org_options = { i: OrganizationOption.objects.get_all_values(i) for i in six.iterkeys(orgs) } with Hub.current.start_span(op="relay_fetch_keys"): project_keys = {} for key in ProjectKey.objects.get_many_from_cache( project_ids, key="project_id"): project_keys.setdefault(key.project_id, []).append(key) metrics.timing("relay_project_configs.projects_requested", len(project_ids)) metrics.timing("relay_project_configs.projects_fetched", len(projects)) metrics.timing("relay_project_configs.orgs_fetched", len(orgs)) configs = {} for project_id in project_ids: configs[six.text_type(project_id)] = None project = projects.get(int(project_id)) if project is None: continue organization = orgs.get(project.organization_id) if organization is None: continue # Try to prevent organization from being fetched again in quotas. project.organization = organization project._organization_cache = organization org_opts = org_options.get(organization.id) or {} with Hub.current.start_span(op="get_config"): with metrics.timer( "relay_project_configs.get_config.duration"): project_config = config.get_project_config( project, org_options=org_opts, full_config=full_config_requested, project_keys=project_keys.get(project.id, []), ) configs[six.text_type( project_id)] = serialized_config = project_config.to_dict() config_size = len(json.dumps(serialized_config)) metrics.timing("relay_project_configs.config_size", config_size) # Log if we see huge project configs if config_size >= PROJECT_CONFIG_SIZE_THRESHOLD: logger.info( "relay.project_config.huge_config", extra={ "project_id": project_id, "size": config_size }, ) return Response({"configs": configs}, status=200)
def _post(self, request): relay = request.relay assert relay is not None # should be provided during Authentication full_config_requested = request.relay_request_data.get("fullConfig") if full_config_requested and not relay.is_internal: return Response("Relay unauthorized for full config information", 403) with Hub.current.start_span(op="relay_fetch_projects"): project_ids = set(request.relay_request_data.get("projects") or ()) if project_ids: with metrics.timer( "relay_project_configs.fetching_projects.duration"): projects = { p.id: p for p in Project.objects.get_many_from_cache( project_ids) } else: projects = {} with Hub.current.start_span(op="relay_fetch_orgs"): # Preload all organizations and their options to prevent repeated # database access when computing the project configuration. org_ids = set(project.organization_id for project in six.itervalues(projects)) if org_ids: with metrics.timer( "relay_project_configs.fetching_orgs.duration"): orgs = Organization.objects.get_many_from_cache(org_ids) orgs = { o.id: o for o in orgs if request.relay.has_org_access(o) } else: orgs = {} org_options = { i: OrganizationOption.objects.get_all_values(i) for i in six.iterkeys(orgs) } with Hub.current.start_span(op="relay_fetch_keys"): project_keys = {} for key in ProjectKey.objects.filter(project_id__in=project_ids): project_keys.setdefault(key.project_id, []).append(key) metrics.timing("relay_project_configs.projects_requested", len(project_ids)) metrics.timing("relay_project_configs.projects_fetched", len(projects)) metrics.timing("relay_project_configs.orgs_fetched", len(orgs)) configs = {} for project_id in project_ids: configs[six.text_type(project_id)] = {"disabled": True} project = projects.get(int(project_id)) if project is None: continue organization = orgs.get(project.organization_id) if organization is None: continue # Try to prevent organization from being fetched again in quotas. project.organization = organization project._organization_cache = organization with Hub.current.start_span(op="get_config"): with metrics.timer( "relay_project_configs.get_config.duration"): project_config = config.get_project_config( project, org_options=org_options.get(organization.id) or {}, full_config=full_config_requested, project_keys=project_keys.get(project.id) or [], ) configs[six.text_type(project_id)] = project_config.to_dict() if full_config_requested: projectconfig_cache.set_many(configs) return Response({"configs": configs}, status=200)
def _post_by_key(self, request, full_config_requested): public_keys = request.relay_request_data.get("publicKeys") public_keys = set(public_keys or ()) project_keys = {} # type: dict[str, ProjectKey] project_ids = set() # type: set[int] with start_span(op="relay_fetch_keys"): with metrics.timer("relay_project_configs.fetching_keys.duration"): for key in ProjectKey.objects.get_many_from_cache( public_keys, key="public_key"): if key.status != ProjectKeyStatus.ACTIVE: continue project_keys[key.public_key] = key project_ids.add(key.project_id) projects = {} # type: dict[int, Project] organization_ids = set() # type: set[int] with start_span(op="relay_fetch_projects"): with metrics.timer( "relay_project_configs.fetching_projects.duration"): for project in Project.objects.get_many_from_cache( project_ids): projects[project.id] = project organization_ids.add(project.organization_id) # Preload all organizations and their options to prevent repeated # database access when computing the project configuration. orgs = {} # type: dict[int, Organization] with start_span(op="relay_fetch_orgs"): with metrics.timer("relay_project_configs.fetching_orgs.duration"): for org in Organization.objects.get_many_from_cache( organization_ids): if request.relay.has_org_access(org): orgs[org.id] = org with start_span(op="relay_fetch_org_options"): with metrics.timer( "relay_project_configs.fetching_org_options.duration"): for org_id in orgs: OrganizationOption.objects.get_all_values(org_id) metrics.timing("relay_project_configs.projects_requested", len(project_ids)) metrics.timing("relay_project_configs.projects_fetched", len(projects)) metrics.timing("relay_project_configs.orgs_fetched", len(orgs)) configs = {} for public_key in public_keys: configs[public_key] = {"disabled": True} key = project_keys.get(public_key) if key is None: continue project = projects.get(key.project_id) if project is None: continue organization = orgs.get(project.organization_id) if organization is None: continue # Try to prevent organization from being fetched again in quotas. project.organization = organization project._organization_cache = organization with Hub.current.start_span(op="get_config"): with metrics.timer( "relay_project_configs.get_config.duration"): project_config = config.get_project_config( project, full_config=full_config_requested, project_keys=[key], ) configs[public_key] = project_config.to_dict() if full_config_requested: projectconfig_cache.set_many(configs) return Response({"configs": configs}, status=200)
def update_config_cache(generate, organization_id=None, project_id=None, public_key=None, update_reason=None): """ Update the Redis cache for the Relay projectconfig. This task is invoked whenever a project/org option has been saved or smart quotas potentially caused a change in projectconfig. Either organization_id or project_id has to be provided. :param organization_id: The organization for which to invalidate configs. :param project_id: The project for which to invalidate configs. :param generate: If `True`, caches will be eagerly regenerated, not only invalidated. """ from sentry.models import Project, ProjectKey, ProjectKeyStatus from sentry.relay import projectconfig_cache from sentry.relay.config import get_project_config if project_id: set_current_event_project(project_id) if organization_id: # Cannot use bind_organization_context here because we do not have a # model and don't want to fetch one sentry_sdk.set_tag("organization_id", organization_id) if public_key: sentry_sdk.set_tag("public_key", public_key) sentry_sdk.set_tag("update_reason", update_reason) sentry_sdk.set_tag("generate", generate) # Delete key before generating configs such that we never have an outdated # but valid cache. # # If this was running at the end of the task, it would be more effective # against bursts of updates, but introduces a different race where an # outdated cache may be used. projectconfig_debounce_cache.mark_task_done(public_key, project_id, organization_id) if organization_id: projects = list( Project.objects.filter(organization_id=organization_id)) keys = list(ProjectKey.objects.filter(project__in=projects)) elif project_id: projects = [Project.objects.get(id=project_id)] keys = list(ProjectKey.objects.filter(project__in=projects)) elif public_key: try: keys = [ProjectKey.objects.get(public_key=public_key)] except ProjectKey.DoesNotExist: # In this particular case, where a project key got deleted and # triggered an update, we at least know the public key that needs # to be deleted from cache. # # In other similar cases, like an org being deleted, we potentially # cannot find any keys anymore, so we don't know which cache keys # to delete. projectconfig_cache.delete_many([public_key]) return else: assert False if generate: config_cache = {} for key in keys: if key.status != ProjectKeyStatus.ACTIVE: project_config = {"disabled": True} else: project_config = get_project_config( key.project, project_keys=[key], full_config=True).to_dict() config_cache[key.public_key] = project_config projectconfig_cache.set_many(config_cache) else: cache_keys_to_delete = [] for key in keys: cache_keys_to_delete.append(key.public_key) projectconfig_cache.delete_many(cache_keys_to_delete)
def _do_process_event(cache_key, start_time, event_id, process_task, data=None): from sentry.plugins.base import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr( "events.failed", tags={"reason": "cache", "stage": "process"}, skip_internal=False ) error_logger.error("process.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] event_id = data["event_id"] project = Project.objects.get_from_cache(id=project_id) with configure_scope() as scope: scope.set_tag("project", project_id) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) try: # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in enhancers or (): enhanced = safe_execute(enhancer, data, _passthrough_errors=(RetrySymbolication,)) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data except RetrySymbolication as e: if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT: error_logger.warning( "process.slow", extra={"project_id": project_id, "event_id": event_id} ) if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT: # Do not drop event but actually continue with rest of pipeline # (persisting unsymbolicated event) error_logger.exception( "process.failed.infinite_retry", extra={"project_id": project_id, "event_id": event_id}, ) else: retry_process_event.apply_async( args=(), kwargs={ "process_task_name": process_task.__name__, "task_kwargs": { "cache_key": cache_key, "event_id": event_id, "start_time": start_time, }, }, countdown=e.retry_after, ) return # Second round of datascrubbing after stacktrace and language-specific # processing. First round happened as part of ingest. # # We assume that all potential PII is produced as part of stacktrace # processors and event enhancers. # # We assume that plugins for eg sessionstack (running via # `plugin.get_event_preprocessors`) are not producing data that should be # PII-stripped, ever. # # XXX(markus): Javascript event error translation is happening after this block # because it uses `get_event_preprocessors` instead of # `get_event_enhancers`, possibly move? if has_changed and features.has( "organizations:datascrubbers-v2", project.organization, actor=None ): with metrics.timer("tasks.store.datascrubbers.scrub"): project_config = get_project_config(project) new_data = safe_execute(scrub_data, project_config=project_config, event=data.data) # XXX(markus): When datascrubbing is finally "totally stable", we might want # to drop the event if it crashes to avoid saving PII if new_data is not None: data.data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in processors or (): result = safe_execute(processor, data) if result: data = result has_changed = True assert data["project"] == project_id, "Project cannot be mutated by plugins" # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: # Run some of normalization again such that we don't: # - persist e.g. incredibly large stacktraces from minidumps # - store event timestamps that are older than our retention window # (also happening with minidumps) normalizer = StoreNormalizer( remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS ) data = normalizer.normalize_event(dict(data)) issues = data.get("processing_issues") try: if issues and create_failed_event( cache_key, data, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev, ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, data) process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)
def test_project_config_with_breakdown(default_project, insta_snapshot): with Feature("organizations:performance-ops-breakdown"): cfg = get_project_config(default_project, full_config=True) cfg = cfg.to_dict() insta_snapshot(cfg["config"]["breakdowns"])
def is_valid_ip(self, ip, inputs): self.project.update_option('sentry:blacklisted_ips', inputs) project_config = get_project_config(self.project.id) return is_valid_ip(project_config, ip)
def _post_by_project(self, request, full_config_requested): project_ids = set(request.relay_request_data.get("projects") or ()) with start_span(op="relay_fetch_projects"): if project_ids: with metrics.timer( "relay_project_configs.fetching_projects.duration"): projects = { p.id: p for p in Project.objects.get_many_from_cache( project_ids) } else: projects = {} with start_span(op="relay_fetch_orgs"): # Preload all organizations and their options to prevent repeated # database access when computing the project configuration. org_ids = { project.organization_id for project in projects.values() } if org_ids: with metrics.timer( "relay_project_configs.fetching_orgs.duration"): orgs = Organization.objects.get_many_from_cache(org_ids) orgs = { o.id: o for o in orgs if request.relay.has_org_access(o) } else: orgs = {} with metrics.timer( "relay_project_configs.fetching_org_options.duration"): for org_id in orgs.keys(): OrganizationOption.objects.get_all_values(org_id) with start_span(op="relay_fetch_keys"): project_keys = {} for key in ProjectKey.objects.filter(project_id__in=project_ids): project_keys.setdefault(key.project_id, []).append(key) metrics.timing("relay_project_configs.projects_requested", len(project_ids)) metrics.timing("relay_project_configs.projects_fetched", len(projects)) metrics.timing("relay_project_configs.orgs_fetched", len(orgs)) configs = {} for project_id in project_ids: configs[str(project_id)] = {"disabled": True} project = projects.get(int(project_id)) if project is None: continue organization = orgs.get(project.organization_id) if organization is None: continue # Try to prevent organization from being fetched again in quotas. project.organization = organization project._organization_cache = organization with start_span(op="get_config"): with metrics.timer( "relay_project_configs.get_config.duration"): project_config = config.get_project_config( project, full_config=full_config_requested, project_keys=project_keys.get(project.id) or [], ) configs[str(project_id)] = project_config.to_dict() if full_config_requested: projectconfig_cache.set_many(configs) return Response({"configs": configs}, status=200)
def is_valid_release(self, value, inputs): self.project.update_option(u'sentry:{}'.format(FilterTypes.RELEASES), inputs) project_config = get_project_config(self.project.id) return is_valid_release(project_config, value)
def _do_process_event( cache_key, start_time, event_id, process_task, data=None, data_has_changed=None, from_symbolicate=False, ): from sentry.plugins.base import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr( "events.failed", tags={"reason": "cache", "stage": "process"}, skip_internal=False ) error_logger.error("process.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_project(project_id) event_id = data["event_id"] with sentry_sdk.start_span(op="tasks.store.process_event.get_project_from_cache"): project = Project.objects.get_from_cache(id=project_id) with metrics.timer("tasks.store.process_event.organization.get_from_cache"): project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id ) has_changed = bool(data_has_changed) with sentry_sdk.start_span(op="tasks.store.process_event.get_reprocessing_revision"): # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) # Stacktrace based event processors. with sentry_sdk.start_span(op="task.store.process_event.stacktraces"): with metrics.timer( "tasks.store.process_event.stacktraces", tags={"from_symbolicate": from_symbolicate} ): new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # Second round of datascrubbing after stacktrace and language-specific # processing. First round happened as part of ingest. # # *Right now* the only sensitive data that is added in stacktrace # processing are usernames in filepaths, so we run directly after # stacktrace processors. # # We do not yet want to deal with context data produced by plugins like # sessionstack or fullstory (which are in `get_event_preprocessors`), as # this data is very unlikely to be sensitive data. This is why scrubbing # happens somewhere in the middle of the pipeline. # # On the other hand, Javascript event error translation is happening after # this block because it uses `get_event_preprocessors` instead of # `get_event_enhancers`. # # We are fairly confident, however, that this should run *before* # re-normalization as it is hard to find sensitive data in partially # trimmed strings. if ( has_changed and options.get("processing.can-use-scrubbers") and features.has("organizations:datascrubbers-v2", project.organization, actor=None) ): with sentry_sdk.start_span(op="task.store.datascrubbers.scrub"): with metrics.timer( "tasks.store.datascrubbers.scrub", tags={"from_symbolicate": from_symbolicate} ): project_config = get_project_config(project) new_data = safe_execute(scrub_data, project_config=project_config, event=data.data) # XXX(markus): When datascrubbing is finally "totally stable", we might want # to drop the event if it crashes to avoid saving PII if new_data is not None: data.data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): with sentry_sdk.start_span(op="task.store.process_event.preprocessors") as span: span.set_data("plugin", plugin.slug) span.set_data("from_symbolicate", from_symbolicate) with metrics.timer( "tasks.store.process_event.preprocessors", tags={"plugin": plugin.slug, "from_symbolicate": from_symbolicate}, ): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in processors or (): result = safe_execute(processor, data) if result: data = result has_changed = True assert data["project"] == project_id, "Project cannot be mutated by plugins" # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: # Run some of normalization again such that we don't: # - persist e.g. incredibly large stacktraces from minidumps # - store event timestamps that are older than our retention window # (also happening with minidumps) normalizer = StoreNormalizer( remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS ) data = normalizer.normalize_event(dict(data)) issues = data.get("processing_issues") try: if issues and create_failed_event( cache_key, data, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev, ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke ourselves again. This happens when the reprocessing # revision changed while we were processing. _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)
def is_valid_error_message(self, value, inputs): self.project.update_option( u'sentry:{}'.format(FilterTypes.ERROR_MESSAGES), inputs) project_config = get_project_config(self.project.id) return is_valid_error_message(project_config, value)
def dispatch(self, request, project_id=None, *args, **kwargs): helper = None try: helper = ClientApiHelper( agent=request.META.get("HTTP_USER_AGENT"), project_id=project_id, ip_address=request.META["REMOTE_ADDR"], ) # if the project id is not directly specified get it from the authentication information project_id = _get_project_id_from_request( project_id, request, self.auth_helper_cls, helper ) project_config = get_project_config(project_id, for_store=True) helper.context.bind_project(project_config.project) if kafka_publisher is not None: self._publish_to_kafka(request, project_config) origin = self.auth_helper_cls.origin_from_request(request) response = self._dispatch( request, helper, project_config, origin=origin, *args, **kwargs ) except APIError as e: context = {"error": force_bytes(e.msg, errors="replace")} if e.name: context["error_name"] = e.name response = HttpResponse( json.dumps(context), content_type="application/json", status=e.http_status ) # Set X-Sentry-Error as in many cases it is easier to inspect the headers response["X-Sentry-Error"] = context["error"] if isinstance(e, APIRateLimited) and e.retry_after is not None: response["Retry-After"] = six.text_type(int(math.ceil(e.retry_after))) except Exception as e: # TODO(dcramer): test failures are not outputting the log message # here if settings.DEBUG: content = traceback.format_exc() else: content = "" logger.exception(e) response = HttpResponse(content, content_type="text/plain", status=500) # TODO(dcramer): it'd be nice if we had an incr_multi method so # tsdb could optimize this metrics.incr("client-api.all-versions.requests", skip_internal=False) metrics.incr( "client-api.all-versions.responses.%s" % (response.status_code,), skip_internal=False ) metrics.incr( "client-api.all-versions.responses.%sxx" % (six.text_type(response.status_code)[0],), skip_internal=False, ) if helper is not None and helper.context is not None and helper.context.version: metrics.incr("client-api.v%s.requests" % (helper.context.version,), skip_internal=False) metrics.incr( "client-api.v%s.responses.%s" % (helper.context.version, response.status_code), skip_internal=False, ) metrics.incr( "client-api.v%s.responses.%sxx" % (helper.context.version, six.text_type(response.status_code)[0]), skip_internal=False, ) return response
def update_config_cache(generate, organization_id=None, project_id=None, update_reason=None): """ Update the Redis cache for the Relay projectconfig. This task is invoked whenever a project/org option has been saved or smart quotas potentially caused a change in projectconfig. Either organization_id or project_id has to be provided. :param organization_id: The organization for which to invalidate configs. :param project_id: The project for which to invalidate configs. :param generate: If `True`, caches will be eagerly regenerated, not only invalidated. """ from sentry.models import Project, ProjectKey, ProjectKeyStatus from sentry.relay import projectconfig_cache from sentry.relay.config import get_project_config if project_id: set_current_event_project(project_id) if organization_id: # Cannot use bind_organization_context here because we do not have a # model and don't want to fetch one sentry_sdk.set_tag("organization_id", organization_id) sentry_sdk.set_tag("update_reason", update_reason) sentry_sdk.set_tag("generate", generate) # Delete key before generating configs such that we never have an outdated # but valid cache. # # If this was running at the end of the task, it would be more effective # against bursts of updates, but introduces a different race where an # outdated cache may be used. projectconfig_debounce_cache.mark_task_done(project_id, organization_id) if project_id: projects = [Project.objects.get_from_cache(id=project_id)] elif organization_id: # XXX(markus): I feel like we should be able to cache this but I don't # want to add another method to src/sentry/db/models/manager.py projects = Project.objects.filter(organization_id=organization_id) project_keys = {} for key in ProjectKey.objects.filter( project_id__in=[project.id for project in projects]): project_keys.setdefault(key.project_id, []).append(key) if generate: config_cache = {} for project in projects: project_config = get_project_config(project, project_keys=project_keys.get( project.id, []), full_config=True) config_cache[project.id] = project_config.to_dict() for key in project_keys.get(project.id) or (): # XXX(markus): This is currently the cleanest way to get only # state for a single projectkey (considering quotas and # everything) if key.status != ProjectKeyStatus.ACTIVE: continue project_config = get_project_config(project, project_keys=[key], full_config=True) config_cache[key.public_key] = project_config.to_dict() projectconfig_cache.set_many(config_cache) else: cache_keys_to_delete = [] for project in projects: cache_keys_to_delete.append(project.id) for key in project_keys.get(project.id) or (): cache_keys_to_delete.append(key.public_key) projectconfig_cache.delete_many(cache_keys_to_delete) metrics.incr( "relay.projectconfig_cache.done", tags={ "generate": generate, "update_reason": update_reason }, )