def _pull_out_data(jobs, projects): """ A bunch of (probably) CPU bound stuff. """ for job in jobs: job["project_id"] = int(job["project_id"]) data = job["data"] # Pull the toplevel data we're interested in job["culprit"] = get_culprit(data) transaction_name = data.get("transaction") if transaction_name: transaction_name = force_text(transaction_name) job["transaction"] = transaction_name key_id = None if data is None else data.get("key_id") if key_id is not None: key_id = int(key_id) job["key_id"] = key_id job["logger_name"] = logger_name = data.get("logger") job["level"] = level = data.get("level") job["release"] = data.get("release") job["dist"] = data.get("dist") job["environment"] = environment = data.get("environment") job["recorded_timestamp"] = data.get("timestamp") job["event"] = event = _get_event_instance(job["data"], project_id=job["project_id"]) job["data"] = data = event.data.data job["category"] = DataCategory.from_event_type(data.get("type")) job["platform"] = event.platform event._project_cache = projects[job["project_id"]] # Some of the data that are toplevel attributes are duplicated # into tags (logger, level, environment, transaction). These are # different from legacy attributes which are normalized into tags # ahead of time (site, server_name). setdefault_path(data, "tags", value=[]) set_tag(data, "level", level) if logger_name: set_tag(data, "logger", logger_name) if environment: set_tag(data, "environment", environment) if transaction_name: set_tag(data, "transaction", transaction_name) job["received_timestamp"] = job["event"].data.get("received") or float( job["event"].datetime.strftime("%s") )
def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] data_category = DataCategory.from_event_type(data.get("type")) if should_filter: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, category=data_category, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) # relay will no longer be able to provide information about filter # status so to see the impact we're adding a way to turn on relay # like behavior here. if options.get("store.lie-about-filter-status"): return event_id raise APIForbidden("Event dropped due to filter: %s" % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, category=data_category, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) # XXX(markus): I believe this code is extremely broken: # # * it practically uses memcached in prod which has no consistency # guarantees (no idea how we don't run into issues there) # # * a TTL of 1h basically doesn't guarantee any deduplication at all. It # just guarantees a good error message... for one hour. if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, category=data_category, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id, )) data = scrub_data(project_config, dict(data)) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id
def process(self, request, project, key, auth, helper, data, project_config, attachments=None, **kwargs): disable_transaction_events() metrics.incr("events.total", skip_internal=False) project_id = project_config.project_id organization_id = project_config.organization_id if not data: track_outcome(organization_id, project_id, key.id, Outcome.INVALID, "no_data") raise APIError("No JSON data was found") remote_addr = request.META["REMOTE_ADDR"] event_manager = EventManager( data, project=project, key=key, auth=auth, client_ip=remote_addr, user_agent=helper.context.agent, version=auth.version, content_encoding=request.META.get("HTTP_CONTENT_ENCODING", ""), project_config=project_config, ) del data self.pre_normalize(event_manager, helper) try: event_manager.normalize() except ProcessingErrorInvalidTransaction as e: track_outcome( organization_id, project_id, key.id, Outcome.INVALID, "invalid_transaction", category=DataCategory.TRANSACTION, ) raise APIError(six.text_type(e).split("\n", 1)[0]) data = event_manager.get_data() dict_data = dict(data) data_size = len(json.dumps(dict_data)) if data_size > 10000000: metrics.timing("events.size.rejected", data_size) track_outcome( organization_id, project_id, key.id, Outcome.INVALID, "too_large", event_id=dict_data.get("event_id"), category=DataCategory.from_event_type(dict_data.get("type")), ) raise APIForbidden("Event size exceeded 10MB after normalization.") metrics.timing("events.size.data.post_storeendpoint", data_size) return process_event(event_manager, project, key, remote_addr, helper, attachments, project_config)
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome from sentry.ingest.outcomes_consumer import mark_signal_sent event_type = "none" if cache_key and data is None: with metrics.timer( "tasks.store.do_save_event.get_cache") as metric_tags: data = default_cache.get(cache_key) if data is not None: metric_tags["event_type"] = event_type = data.get( "type") or "none" data_category = DataCategory.from_event_type(event_type) with metrics.global_tags(event_type=event_type): if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") key_id = None if data is None else data.get("key_id") if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): with metrics.timer("tasks.store.do_save_event.delete_raw_event"): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "post" }, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: with metrics.timer("tasks.store.do_save_event.event_manager.save"): manager = EventManager(data) # event.project.organization is populated after this statement. event = manager.save(project_id, assume_normalized=True, cache_key=cache_key) with metrics.timer("tasks.store.do_save_event.track_outcome"): # This is where we can finally say that we have accepted the event. mark_signal_sent(event.project.id, event_id) track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id, data_category, ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) # This outcome corresponds to the event_discarded signal. The # outcomes_consumer generically handles all FILTERED outcomes, but # needs to skip this one. mark_signal_sent(project_id, event_id) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id, data_category, ) finally: if cache_key: with metrics.timer("tasks.store.do_save_event.delete_cache"): default_cache.delete(cache_key) with metrics.timer( "tasks.store.do_save_event.delete_attachment_cache"): # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or features.has( "organizations:event-attachments", event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"])