def map_row(self, row: MutableMapping[str, Any]) -> None: if "category" in row: category = ( DataCategory.ERROR if row["category"] in DataCategory.error_categories() else DataCategory(row["category"]) ) row["category"] = category.api_name()
def get_quotas(self, project, key=None, keys=None): if key: key.project = project results = [] pquota = self.get_project_quota(project) if pquota[0] is not None: results.append( QuotaConfig( id="p", scope=QuotaScope.PROJECT, scope_id=project.id, categories=DataCategory.error_categories(), limit=pquota[0], window=pquota[1], reason_code="project_quota", ) ) oquota = self.get_organization_quota(project.organization) if oquota[0] is not None: results.append( QuotaConfig( id="o", scope=QuotaScope.ORGANIZATION, scope_id=project.organization.id, categories=DataCategory.error_categories(), limit=oquota[0], window=oquota[1], reason_code="org_quota", ) ) if key and not keys: keys = [key] elif not keys: keys = [] for key in keys: kquota = self.get_key_quota(key) if kquota[0] is not None: results.append( QuotaConfig( id="k", scope=QuotaScope.KEY, scope_id=key.id, categories=DataCategory.error_categories(), limit=kquota[0], window=kquota[1], reason_code="key_quota", ) ) return results
def build_project_usage_outcomes(start__stop, project): start, stop = start__stop # XXX(epurkhiser): Tsdb used to use day buckets, where the end would # represent a whole day. Snuba queries more accurately thus we must # capture the entire last day end = stop + timedelta(days=1) query = Query( dataset=Dataset.Outcomes.value, match=Entity("outcomes"), select=[ Column("outcome"), Column("category"), Function("sum", [Column("quantity")], "total"), ], where=[ Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, end), Condition(Column("project_id"), Op.EQ, project.id), Condition(Column("org_id"), Op.EQ, project.organization_id), Condition( Column("outcome"), Op.IN, [Outcome.ACCEPTED, Outcome.FILTERED, Outcome.RATE_LIMITED]), Condition( Column("category"), Op.IN, [*DataCategory.error_categories(), DataCategory.TRANSACTION], ), ], groupby=[Column("outcome"), Column("category")], granularity=Granularity(ONE_DAY), ) data = raw_snql_query(query, referrer="reports.outcomes")["data"] return ( # Accepted errors sum(row["total"] for row in data if row["category"] in DataCategory.error_categories() and row["outcome"] == Outcome.ACCEPTED), # Dropped errors sum(row["total"] for row in data if row["category"] in DataCategory.error_categories() and row["outcome"] == Outcome.RATE_LIMITED), # accepted transactions sum(row["total"] for row in data if row["category"] == DataCategory.TRANSACTION and row["outcome"] == Outcome.ACCEPTED), # Dropped transactions sum(row["total"] for row in data if row["category"] == DataCategory.TRANSACTION and row["outcome"] == Outcome.RATE_LIMITED), )
def resolve_filter(self, raw_filter: Sequence[str]) -> List[DataCategory]: resolved_categories = set() for category in raw_filter: # combine DEFAULT, ERROR, and SECURITY as errors. # see relay: py/sentry_relay/consts.py and relay-cabi/include/relay.h parsed_category = DataCategory.parse(category) if parsed_category is None: raise InvalidField(f'Invalid category: "{category}"') elif parsed_category == DataCategory.ERROR: resolved_categories.update(DataCategory.error_categories()) else: resolved_categories.add(parsed_category) if DataCategory.ATTACHMENT in resolved_categories and len(resolved_categories) > 1: raise InvalidQuery("if filtering by attachment no other category may be present") return list(resolved_categories)
def tsdb_increments_from_outcome(org_id, project_id, key_id, outcome, reason, category): category = category if category is not None else DataCategory.ERROR if category not in DataCategory.event_categories(): return if outcome != Outcome.INVALID: # This simply preserves old behavior. We never counted invalid events # (too large, duplicate, CORS) toward regular `received` counts. if project_id is not None: yield (tsdb.models.project_total_received, project_id) if org_id is not None: yield (tsdb.models.organization_total_received, org_id) if key_id is not None: yield (tsdb.models.key_total_received, key_id) if outcome == Outcome.FILTERED: if project_id is not None: yield (tsdb.models.project_total_blacklisted, project_id) if org_id is not None: yield (tsdb.models.organization_total_blacklisted, org_id) if key_id is not None: yield (tsdb.models.key_total_blacklisted, key_id) elif outcome == Outcome.RATE_LIMITED: if project_id is not None: yield (tsdb.models.project_total_rejected, project_id) if org_id is not None: yield (tsdb.models.organization_total_rejected, org_id) if key_id is not None: yield (tsdb.models.key_total_rejected, key_id) if reason in FILTER_STAT_KEYS_TO_VALUES: if project_id is not None: yield (FILTER_STAT_KEYS_TO_VALUES[reason], project_id)
def _pull_out_data(jobs, projects): """ A bunch of (probably) CPU bound stuff. """ for job in jobs: job["project_id"] = int(job["project_id"]) data = job["data"] # Pull the toplevel data we're interested in job["culprit"] = get_culprit(data) transaction_name = data.get("transaction") if transaction_name: transaction_name = force_text(transaction_name) job["transaction"] = transaction_name key_id = None if data is None else data.get("key_id") if key_id is not None: key_id = int(key_id) job["key_id"] = key_id job["logger_name"] = logger_name = data.get("logger") job["level"] = level = data.get("level") job["release"] = data.get("release") job["dist"] = data.get("dist") job["environment"] = environment = data.get("environment") job["recorded_timestamp"] = data.get("timestamp") job["event"] = event = _get_event_instance(job["data"], project_id=job["project_id"]) job["data"] = data = event.data.data job["category"] = DataCategory.from_event_type(data.get("type")) job["platform"] = event.platform event._project_cache = projects[job["project_id"]] # Some of the data that are toplevel attributes are duplicated # into tags (logger, level, environment, transaction). These are # different from legacy attributes which are normalized into tags # ahead of time (site, server_name). setdefault_path(data, "tags", value=[]) set_tag(data, "level", level) if logger_name: set_tag(data, "logger", logger_name) if environment: set_tag(data, "environment", environment) if transaction_name: set_tag(data, "transaction", transaction_name) job["received_timestamp"] = job["event"].data.get("received") or float( job["event"].datetime.strftime("%s") )
def get_quotas(self, project, key=None, keys=None): if key: key.project = project results = [] if not features.has("organizations:releases-v2", project.organization): results.append( QuotaConfig( limit=0, scope=QuotaScope.ORGANIZATION, categories=[DataCategory.SESSION], reason_code="sessions_unavailable", )) pquota = self.get_project_quota(project) if pquota[0] is not None: results.append( QuotaConfig( id="p", scope=QuotaScope.PROJECT, scope_id=project.id, categories=DataCategory.event_categories(), limit=pquota[0], window=pquota[1], reason_code="project_quota", )) oquota = self.get_organization_quota(project.organization) if oquota[0] is not None: results.append( QuotaConfig( id="o", scope=QuotaScope.ORGANIZATION, scope_id=project.organization.id, categories=DataCategory.event_categories(), limit=oquota[0], window=oquota[1], reason_code="org_quota", )) if key and not keys: keys = [key] elif not keys: keys = [] for key in keys: kquota = self.get_key_quota(key) if kquota[0] is not None: results.append( QuotaConfig( id="k", scope=QuotaScope.KEY, scope_id=key.id, categories=DataCategory.event_categories(), limit=kquota[0], window=kquota[1], reason_code="key_quota", )) return results
def build_project_series(start__stop, project): start, stop = start__stop rollup = ONE_DAY resolution, series = tsdb.get_optimal_rollup_series(start, stop, rollup) assert resolution == rollup, "resolution does not match requested value" clean = partial(clean_series, start, stop, rollup) def zerofill_clean(data): return clean(zerofill(data, start, stop, rollup, fill_default=0)) # Note: this section can be removed issue_ids = project.group_set.filter(status=GroupStatus.RESOLVED, resolved_at__gte=start, resolved_at__lt=stop).values_list( "id", flat=True) # TODO: The TSDB calls could be replaced with a SnQL call here tsdb_range_resolved = _query_tsdb_groups_chunked(tsdb.get_range, issue_ids, start, stop, rollup) resolved_error_series = reduce( merge_series, map(clean, tsdb_range_resolved.values()), clean([(timestamp, 0) for timestamp in series]), ) # end # Use outcomes to compute total errors and transactions outcomes_query = Query( dataset=Dataset.Outcomes.value, match=Entity("outcomes"), select=[ Column("time"), Column("category"), Function("sum", [Column("quantity")], "total"), ], where=[ Condition(Column("timestamp"), Op.GTE, start), Condition(Column("timestamp"), Op.LT, stop + timedelta(days=1)), Condition(Column("project_id"), Op.EQ, project.id), Condition(Column("org_id"), Op.EQ, project.organization_id), Condition(Column("outcome"), Op.EQ, Outcome.ACCEPTED), Condition( Column("category"), Op.IN, [*DataCategory.error_categories(), DataCategory.TRANSACTION], ), ], groupby=[Column("time"), Column("category")], granularity=Granularity(rollup), orderby=[OrderBy(Column("time"), Direction.ASC)], ) outcome_series = raw_snql_query(outcomes_query, referrer="reports.outcome_series") total_error_series = OrderedDict() for v in outcome_series["data"]: if v["category"] in DataCategory.error_categories(): timestamp = int(to_timestamp(parse_snuba_datetime(v["time"]))) total_error_series[timestamp] = total_error_series.get( timestamp, 0) + v["total"] total_error_series = zerofill_clean(list(total_error_series.items())) transaction_series = [(int(to_timestamp(parse_snuba_datetime(v["time"]))), v["total"]) for v in outcome_series["data"] if v["category"] == DataCategory.TRANSACTION] transaction_series = zerofill_clean(transaction_series) error_series = merge_series( resolved_error_series, total_error_series, lambda resolved, total: (resolved, total - resolved), # Resolved, Unresolved ) # Format of this series: [(resolved , unresolved, transactions)] return merge_series( error_series, transaction_series, lambda errors, transactions: errors + (transactions, ), )
SnubaModelQuerySettings = collections.namedtuple( # `dataset` - the dataset in Snuba that we want to query # `groupby` - the column in Snuba that we want to put in the group by statement # `aggregate` - the column in Snuba that we want to run the aggregate function on # `conditions` - any additional model specific conditions we want to pass in the query "SnubaModelSettings", ["dataset", "groupby", "aggregate", "conditions"], ) # combine DEFAULT, ERROR, and SECURITY as errors. We are now recording outcome by # category, and these TSDB models and where they're used assume only errors. # see relay: py/sentry_relay/consts.py and relay-cabi/include/relay.h OUTCOMES_CATEGORY_CONDITION = [ "category", "IN", DataCategory.error_categories(), ] class SnubaTSDB(BaseTSDB): """ A time series query interface to Snuba Write methods are not supported, as the raw data from which we generate our time series is assumed to already exist in snuba. Read methods are supported only for models based on group/event data and will return empty results for unsupported models. """ # Since transactions are currently (and temporarily) written to Snuba's events storage we need to
def process(self, request, project, key, auth, helper, data, project_config, attachments=None, **kwargs): disable_transaction_events() metrics.incr("events.total", skip_internal=False) project_id = project_config.project_id organization_id = project_config.organization_id if not data: track_outcome(organization_id, project_id, key.id, Outcome.INVALID, "no_data") raise APIError("No JSON data was found") remote_addr = request.META["REMOTE_ADDR"] event_manager = EventManager( data, project=project, key=key, auth=auth, client_ip=remote_addr, user_agent=helper.context.agent, version=auth.version, content_encoding=request.META.get("HTTP_CONTENT_ENCODING", ""), project_config=project_config, ) del data self.pre_normalize(event_manager, helper) try: event_manager.normalize() except ProcessingErrorInvalidTransaction as e: track_outcome( organization_id, project_id, key.id, Outcome.INVALID, "invalid_transaction", category=DataCategory.TRANSACTION, ) raise APIError(six.text_type(e).split("\n", 1)[0]) data = event_manager.get_data() dict_data = dict(data) data_size = len(json.dumps(dict_data)) if data_size > 10000000: metrics.timing("events.size.rejected", data_size) track_outcome( organization_id, project_id, key.id, Outcome.INVALID, "too_large", event_id=dict_data.get("event_id"), category=DataCategory.from_event_type(dict_data.get("type")), ) raise APIForbidden("Event size exceeded 10MB after normalization.") metrics.timing("events.size.data.post_storeendpoint", data_size) return process_event(event_manager, project, key, remote_addr, helper, attachments, project_config)
def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] data_category = DataCategory.from_event_type(data.get("type")) if should_filter: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, category=data_category, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) # relay will no longer be able to provide information about filter # status so to see the impact we're adding a way to turn on relay # like behavior here. if options.get("store.lie-about-filter-status"): return event_id raise APIForbidden("Event dropped due to filter: %s" % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, category=data_category, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) # XXX(markus): I believe this code is extremely broken: # # * it practically uses memcached in prod which has no consistency # guarantees (no idea how we don't run into issues there) # # * a TTL of 1h basically doesn't guarantee any deduplication at all. It # just guarantees a good error message... for one hour. if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, category=data_category, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id, )) data = scrub_data(project_config, dict(data)) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome from sentry.ingest.outcomes_consumer import mark_signal_sent event_type = "none" if cache_key and data is None: with metrics.timer( "tasks.store.do_save_event.get_cache") as metric_tags: data = default_cache.get(cache_key) if data is not None: metric_tags["event_type"] = event_type = data.get( "type") or "none" data_category = DataCategory.from_event_type(event_type) with metrics.global_tags(event_type=event_type): if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") key_id = None if data is None else data.get("key_id") if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): with metrics.timer("tasks.store.do_save_event.delete_raw_event"): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "post" }, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: with metrics.timer("tasks.store.do_save_event.event_manager.save"): manager = EventManager(data) # event.project.organization is populated after this statement. event = manager.save(project_id, assume_normalized=True, cache_key=cache_key) with metrics.timer("tasks.store.do_save_event.track_outcome"): # This is where we can finally say that we have accepted the event. mark_signal_sent(event.project.id, event_id) track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id, data_category, ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) # This outcome corresponds to the event_discarded signal. The # outcomes_consumer generically handles all FILTERED outcomes, but # needs to skip this one. mark_signal_sent(project_id, event_id) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id, data_category, ) finally: if cache_key: with metrics.timer("tasks.store.do_save_event.delete_cache"): default_cache.delete(cache_key) with metrics.timer( "tasks.store.do_save_event.delete_attachment_cache"): # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or features.has( "organizations:event-attachments", event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"])
def _process_signal(msg): project_id = int(msg.get("project_id") or 0) if project_id == 0: metrics.incr("outcomes_consumer.skip_outcome", tags={"reason": "project_zero"}) return # no project. this is valid, so ignore silently. outcome = int(msg.get("outcome", -1)) if outcome not in (Outcome.ACCEPTED, Outcome.FILTERED, Outcome.RATE_LIMITED): metrics.incr("outcomes_consumer.skip_outcome", tags={"reason": "wrong_outcome_type"}) return # nothing to do here event_id = msg.get("event_id") if not event_id: metrics.incr("outcomes_consumer.skip_outcome", tags={"reason": "missing_event_id"}) return if is_signal_sent(project_id=project_id, event_id=event_id): metrics.incr("outcomes_consumer.skip_outcome", tags={"reason": "is_signal_sent"}) return # message already processed nothing left to do try: project = Project.objects.get_from_cache(id=project_id) except Project.DoesNotExist: metrics.incr("outcomes_consumer.skip_outcome", tags={"reason": "unknown_project"}) logger.error("OutcomesConsumer could not find project with id: %s", project_id) return reason = msg.get("reason") remote_addr = msg.get("remote_addr") quantity = msg.get("quantity") category = msg.get("category") if category is not None: category = DataCategory(category) if outcome == Outcome.ACCEPTED: event_saved.send_robust(project=project, category=category, quantity=quantity, sender=OutcomesConsumerWorker) elif outcome == Outcome.FILTERED and reason == FilterStatKeys.DISCARDED_HASH: event_discarded.send_robust(project=project, category=category, quantity=quantity, sender=OutcomesConsumerWorker) elif outcome == Outcome.FILTERED: event_filtered.send_robust( ip=remote_addr, project=project, category=category, quantity=quantity, sender=OutcomesConsumerWorker, ) elif outcome == Outcome.RATE_LIMITED: event_dropped.send_robust( ip=remote_addr, project=project, reason_code=reason, category=category, quantity=quantity, sender=OutcomesConsumerWorker, ) # remember that we sent the signal just in case the processor dies before mark_signal_sent(project_id=project_id, event_id=event_id) timestamp = msg.get("timestamp") if timestamp is not None: delta = to_datetime(time.time()) - parse_timestamp(timestamp) metrics.timing("outcomes_consumer.timestamp_lag", delta.total_seconds()) metrics.incr("outcomes_consumer.signal_sent", tags={ "reason": reason, "outcome": outcome })