def test_outcome_consumer_remembers_handled_outcomes(kafka_producer, task_runner, kafka_admin): producer, project_id, topic_name = _setup_outcome_test( kafka_producer, kafka_admin) consumer_group = "test-outcome-consumer-3" # put a few outcome messages on the kafka topic for i in six.moves.range(1, 3): # emit the same outcome twice ( simulate the case when the producer goes down without # committing the kafka offsets and is restarted) msg = _get_outcome( event_id=1, project_id=project_id, outcome=Outcome.FILTERED, reason="some_reason", remote_addr="127.33.44.{}".format(1), ) producer.produce(topic_name, msg) # setup django signals for event_filtered and event_dropped event_filtered_sink = [] event_dropped_sink = [] def event_filtered_receiver(**kwargs): event_filtered_sink.append(kwargs.get("ip")) def event_dropped_receiver(**kwargs): event_dropped_sink.append("something") event_filtered.connect(event_filtered_receiver) event_dropped.connect(event_dropped_receiver) # run the outcome consumer with task_runner(): run_outcomes_consumer( commit_batch_size=2, consumer_group=consumer_group, max_fetch_time_seconds=0.1, initial_offset_reset="earliest", is_shutdown_requested=_shutdown_requested( max_secs=10, num_outcomes=1, signal_sink=event_filtered_sink), ) # verify that the appropriate filters were called assert len(event_filtered_sink) == 1 assert event_filtered_sink == ["127.33.44.1"] assert len(event_dropped_sink) == 0
def test_outcome_consumer_handles_rate_limited_outcomes( kafka_producer, task_runner, kafka_admin): producer, project_id, topic_name = _setup_outcome_test( kafka_producer, kafka_admin) consumer_group = "test-outcome-consumer-5" # put a few outcome messages on the kafka topic for i in six.moves.range(1, 3): msg = _get_outcome( event_id=i, project_id=project_id, outcome=Outcome.RATE_LIMITED, reason="reason_{}".format(i), remote_addr="127.33.44.{}".format(i), ) producer.produce(topic_name, msg) # setup django signals for event_filtered and event_dropped event_filtered_sink = [] event_dropped_sink = [] def event_filtered_receiver(**kwargs): event_filtered_sink.append("something") def event_dropped_receiver(**kwargs): event_dropped_sink.append( (kwargs.get("ip"), kwargs.get("reason_code"))) event_filtered.connect(event_filtered_receiver) event_dropped.connect(event_dropped_receiver) # run the outcome consumer with task_runner(): run_outcomes_consumer( commit_batch_size=2, consumer_group=consumer_group, max_fetch_time_seconds=0.1, initial_offset_reset="earliest", is_shutdown_requested=_shutdown_requested( max_secs=10, num_outcomes=1, signal_sink=event_filtered_sink), ) # verify that the appropriate filters were called assert len(event_filtered_sink) == 0 assert len(event_dropped_sink) == 2 assert event_dropped_sink == [("127.33.44.1", "reason_1"), ("127.33.44.2", "reason_2")]
def test_outcome_consumer_ignores_outcomes_already_handled( kafka_producer, task_runner, kafka_admin ): producer, project_id, topic_name = _setup_outcome_test(kafka_producer, kafka_admin) consumer_group = "test-outcome-consumer-1" # put a few outcome messages on the kafka topic and also mark them in the cache for i in six.moves.range(1, 3): msg = _get_outcome( event_id=i, project_id=project_id, outcome=Outcome.FILTERED, reason="some_reason", remote_addr="127.33.44.{}".format(i), ) # pretend that we have already processed this outcome before mark_signal_sent(project_id=project_id, event_id=_get_event_id(i)) # put the outcome on the kafka topic producer.produce(topic_name, msg) # setup django signals for event_filtered and event_dropped event_filtered_sink = [] event_dropped_sink = [] def event_filtered_receiver(**kwargs): event_filtered_sink.append(kwargs.get("ip")) def event_dropped_receiver(**kwargs): event_dropped_sink.append("something") event_filtered.connect(event_filtered_receiver) event_dropped.connect(event_dropped_receiver) # run the outcome consumer with task_runner(): run_outcomes_consumer( commit_batch_size=2, consumer_group=consumer_group, max_fetch_time_seconds=0.1, initial_offset_reset="earliest", is_shutdown_requested=_shutdown_requested( max_secs=10, num_outcomes=1, signal_sink=event_filtered_sink ), ) # verify that no signal was called (since the events have been previously processed) assert len(event_filtered_sink) == 0 assert len(event_dropped_sink) == 0
def outcome_consumer(**options): """ Runs an "outcomes consumer" task. The "outcomes consumer" tasks read outcomes from a kafka topic and sends signals for some of them. """ from sentry.ingest.outcomes_consumer import run_outcomes_consumer max_fetch_time_seconds = options["max_fetch_time_ms"] / 1000.0 run_outcomes_consumer( commit_batch_size=options["commit_batch_size"], consumer_group=options["group"], max_fetch_time_seconds=max_fetch_time_seconds, initial_offset_reset=options["initial_offset_reset"], )