Example #1
0
def ingest_consumer(consumer_types, all_consumer_types, **options):
    """
    Runs an "ingest consumer" task.

    The "ingest consumer" tasks read events from a kafka topic (coming from Relay) and schedules
    process event celery tasks for them
    """
    from sentry.ingest.ingest_consumer import get_ingest_consumer
    from sentry.utils import metrics

    if all_consumer_types:
        if consumer_types:
            raise click.ClickException(
                "Cannot specify --all-consumer types and --consumer-type at the same time"
            )
        else:
            consumer_types = set(ConsumerType.all())

    if not all_consumer_types and not consumer_types:
        raise click.ClickException(
            "Need to specify --all-consumer-types or --consumer-type")

    concurrency = options.pop("concurrency", None)
    if concurrency is not None:
        executor = ThreadPoolExecutor(concurrency)
    else:
        executor = None

    with metrics.global_tags(ingest_consumer_types=",".join(
            sorted(consumer_types)),
                             _all_threads=True):
        get_ingest_consumer(consumer_types=consumer_types,
                            executor=executor,
                            **options).run()
Example #2
0
def ingest_consumer(consumer_type, **options):
    """
    Runs an "ingest consumer" task.

    The "ingest consumer" tasks read events from a kafka topic (coming from Relay) and schedules
    process event celery tasks for them
    """
    from sentry.ingest.ingest_consumer import ConsumerType, get_ingest_consumer
    from sentry.utils import metrics

    if consumer_type == "events":
        consumer_type = ConsumerType.Events
    elif consumer_type == "transactions":
        consumer_type = ConsumerType.Transactions
    elif consumer_type == "attachments":
        consumer_type = ConsumerType.Attachments

    with metrics.global_tags(ingest_consumer_type=consumer_type,
                             _all_threads=True):
        get_ingest_consumer(consumer_type=consumer_type, **options).run()
Example #3
0
def _do_save_event(cache_key=None,
                   data=None,
                   start_time=None,
                   event_id=None,
                   project_id=None,
                   **kwargs):
    """
    Saves an event to the database.
    """

    set_current_event_project(project_id)

    from sentry.event_manager import EventManager, HashDiscarded

    event_type = "none"

    if cache_key and data is None:
        with metrics.timer(
                "tasks.store.do_save_event.get_cache") as metric_tags:
            data = event_processing_store.get(cache_key)
            if data is not None:
                metric_tags["event_type"] = event_type = data.get(
                    "type") or "none"

    with metrics.global_tags(event_type=event_type):
        if data is not None:
            data = CanonicalKeyDict(data)

        if event_id is None and data is not None:
            event_id = data["event_id"]

        # only when we come from reprocessing we get a project_id sent into
        # the task.
        if project_id is None:
            project_id = data.pop("project")
            set_current_event_project(project_id)

        # We only need to delete raw events for events that support
        # reprocessing.  If the data cannot be found we want to assume
        # that we need to delete the raw event.
        if not data or reprocessing.event_supports_reprocessing(data):
            with metrics.timer("tasks.store.do_save_event.delete_raw_event"):
                delete_raw_event(project_id, event_id, allow_hint_clear=True)

        # This covers two cases: where data is None because we did not manage
        # to fetch it from the default cache or the empty dictionary was
        # stored in the default cache.  The former happens if the event
        # expired while being on the queue, the second happens on reprocessing
        # if the raw event was deleted concurrently while we held on to
        # it.  This causes the node store to delete the data and we end up
        # fetching an empty dict.  We could in theory not invoke `save_event`
        # in those cases but it's important that we always clean up the
        # reprocessing reports correctly or they will screw up the UI.  So
        # to future proof this correctly we just handle this case here.
        if not data:
            metrics.incr("events.failed",
                         tags={
                             "reason": "cache",
                             "stage": "post"
                         },
                         skip_internal=False)
            return

        try:
            with metrics.timer("tasks.store.do_save_event.event_manager.save"):
                manager = EventManager(data)
                # event.project.organization is populated after this statement.
                manager.save(project_id,
                             assume_normalized=True,
                             start_time=start_time,
                             cache_key=cache_key)
                # Put the updated event back into the cache so that post_process
                # has the most recent data.
                data = manager.get_data()
                if isinstance(data, CANONICAL_TYPES):
                    data = dict(data.items())
                with metrics.timer(
                        "tasks.store.do_save_event.write_processing_cache"):
                    event_processing_store.store(data)
        except HashDiscarded:
            # Delete the event payload from cache since it won't show up in post-processing.
            if cache_key:
                with metrics.timer("tasks.store.do_save_event.delete_cache"):
                    event_processing_store.delete_by_key(cache_key)

        finally:
            reprocessing2.mark_event_reprocessed(data)
            if cache_key:
                with metrics.timer(
                        "tasks.store.do_save_event.delete_attachment_cache"):
                    attachment_cache.delete(cache_key)

            if start_time:
                metrics.timing("events.time-to-process",
                               time() - start_time,
                               instance=data["platform"])

            time_synthetic_monitoring_event(data, project_id, start_time)
Example #4
0
def _do_save_event(cache_key=None,
                   data=None,
                   start_time=None,
                   event_id=None,
                   project_id=None,
                   **kwargs):
    """
    Saves an event to the database.
    """

    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas
    from sentry.models import ProjectKey
    from sentry.utils.outcomes import Outcome, track_outcome
    from sentry.ingest.outcomes_consumer import mark_signal_sent

    event_type = "none"

    if cache_key and data is None:
        with metrics.timer(
                "tasks.store.do_save_event.get_cache") as metric_tags:
            data = default_cache.get(cache_key)
            if data is not None:
                metric_tags["event_type"] = event_type = data.get(
                    "type") or "none"

    with metrics.global_tags(event_type=event_type):
        if data is not None:
            data = CanonicalKeyDict(data)

        if event_id is None and data is not None:
            event_id = data["event_id"]

        # only when we come from reprocessing we get a project_id sent into
        # the task.
        if project_id is None:
            project_id = data.pop("project")

        key_id = None if data is None else data.get("key_id")
        if key_id is not None:
            key_id = int(key_id)
        timestamp = to_datetime(start_time) if start_time is not None else None

        # We only need to delete raw events for events that support
        # reprocessing.  If the data cannot be found we want to assume
        # that we need to delete the raw event.
        if not data or reprocessing.event_supports_reprocessing(data):
            with metrics.timer("tasks.store.do_save_event.delete_raw_event"):
                delete_raw_event(project_id, event_id, allow_hint_clear=True)

        # This covers two cases: where data is None because we did not manage
        # to fetch it from the default cache or the empty dictionary was
        # stored in the default cache.  The former happens if the event
        # expired while being on the queue, the second happens on reprocessing
        # if the raw event was deleted concurrently while we held on to
        # it.  This causes the node store to delete the data and we end up
        # fetching an empty dict.  We could in theory not invoke `save_event`
        # in those cases but it's important that we always clean up the
        # reprocessing reports correctly or they will screw up the UI.  So
        # to future proof this correctly we just handle this case here.
        if not data:
            metrics.incr("events.failed",
                         tags={
                             "reason": "cache",
                             "stage": "post"
                         },
                         skip_internal=False)
            return

        with configure_scope() as scope:
            scope.set_tag("project", project_id)

        event = None
        try:
            with metrics.timer("tasks.store.do_save_event.event_manager.save"):
                manager = EventManager(data)
                # event.project.organization is populated after this statement.
                event = manager.save(project_id,
                                     assume_normalized=True,
                                     cache_key=cache_key)

            with metrics.timer("tasks.store.do_save_event.track_outcome"):
                # This is where we can finally say that we have accepted the event.
                track_outcome(
                    event.project.organization_id,
                    event.project.id,
                    key_id,
                    Outcome.ACCEPTED,
                    None,
                    timestamp,
                    event_id,
                )

        except HashDiscarded:
            project = Project.objects.get_from_cache(id=project_id)
            reason = FilterStatKeys.DISCARDED_HASH
            project_key = None
            try:
                if key_id is not None:
                    project_key = ProjectKey.objects.get_from_cache(id=key_id)
            except ProjectKey.DoesNotExist:
                pass

            quotas.refund(project, key=project_key, timestamp=start_time)
            # There is no signal supposed to be sent for this particular
            # outcome-reason combination. Prevent the outcome consumer from
            # emitting it for now.
            #
            # XXX(markus): Revisit decision about signals once outcomes consumer is stable.
            mark_signal_sent(project_id, event_id)
            track_outcome(
                project.organization_id,
                project_id,
                key_id,
                Outcome.FILTERED,
                reason,
                timestamp,
                event_id,
            )

        finally:
            if cache_key:
                with metrics.timer("tasks.store.do_save_event.delete_cache"):
                    default_cache.delete(cache_key)

                with metrics.timer(
                        "tasks.store.do_save_event.delete_attachment_cache"):
                    # For the unlikely case that we did not manage to persist the
                    # event we also delete the key always.
                    if event is None or features.has(
                            "organizations:event-attachments",
                            event.project.organization,
                            actor=None):
                        attachment_cache.delete(cache_key)

            if start_time:
                metrics.timing("events.time-to-process",
                               time() - start_time,
                               instance=data["platform"])
Example #5
0
def _do_save_event(
    cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs
):
    """
    Saves an event to the database.
    """

    from sentry.event_manager import EventManager, HashDiscarded

    event_type = "none"

    if cache_key and data is None:
        with metrics.timer("tasks.store.do_save_event.get_cache") as metric_tags:
            data = default_cache.get(cache_key)
            if data is not None:
                metric_tags["event_type"] = event_type = data.get("type") or "none"

    with metrics.global_tags(event_type=event_type):
        if data is not None:
            data = CanonicalKeyDict(data)

        if event_id is None and data is not None:
            event_id = data["event_id"]

        # only when we come from reprocessing we get a project_id sent into
        # the task.
        if project_id is None:
            project_id = data.pop("project")

        # We only need to delete raw events for events that support
        # reprocessing.  If the data cannot be found we want to assume
        # that we need to delete the raw event.
        if not data or reprocessing.event_supports_reprocessing(data):
            with metrics.timer("tasks.store.do_save_event.delete_raw_event"):
                delete_raw_event(project_id, event_id, allow_hint_clear=True)

        # This covers two cases: where data is None because we did not manage
        # to fetch it from the default cache or the empty dictionary was
        # stored in the default cache.  The former happens if the event
        # expired while being on the queue, the second happens on reprocessing
        # if the raw event was deleted concurrently while we held on to
        # it.  This causes the node store to delete the data and we end up
        # fetching an empty dict.  We could in theory not invoke `save_event`
        # in those cases but it's important that we always clean up the
        # reprocessing reports correctly or they will screw up the UI.  So
        # to future proof this correctly we just handle this case here.
        if not data:
            metrics.incr(
                "events.failed", tags={"reason": "cache", "stage": "post"}, skip_internal=False
            )
            return

        with configure_scope() as scope:
            scope.set_tag("project", project_id)

        event = None
        try:
            with metrics.timer("tasks.store.do_save_event.event_manager.save"):
                manager = EventManager(data)
                # event.project.organization is populated after this statement.
                event = manager.save(
                    project_id, assume_normalized=True, start_time=start_time, cache_key=cache_key
                )

        except HashDiscarded:
            pass

        finally:
            if cache_key:
                with metrics.timer("tasks.store.do_save_event.delete_cache"):
                    default_cache.delete(cache_key)

                with metrics.timer("tasks.store.do_save_event.delete_attachment_cache"):
                    # For the unlikely case that we did not manage to persist the
                    # event we also delete the key always.
                    if event is None or features.has(
                        "organizations:event-attachments", event.project.organization, actor=None
                    ):
                        attachment_cache.delete(cache_key)

            if start_time:
                metrics.timing(
                    "events.time-to-process", time() - start_time, instance=data["platform"]
                )