コード例 #1
0
ファイル: run.py プロジェクト: ruizhi09/sentry
def outcome_consumer(**options):
    """
    Runs an "outcomes consumer" task.

    The "outcomes consumer" tasks read outcomes from a kafka topic and sends
    signals for some of them.
    """
    from sentry.ingest.outcomes_consumer import get_outcomes_consumer

    get_outcomes_consumer(**options).run()
コード例 #2
0
def test_outcome_consumer_ignores_outcomes_already_handled(
        kafka_producer, task_runner, kafka_admin, requires_kafka):
    producer, project_id, topic_name = _setup_outcome_test(
        kafka_producer, kafka_admin)

    group_id = "test-outcome-consumer-1"
    last_event_id = None

    # put a few outcome messages on the kafka topic and also mark them in the cache
    for i in range(4):
        msg = _get_outcome(
            event_id=i,
            project_id=project_id,
            outcome=Outcome.FILTERED,
            reason="some_reason",
            remote_addr="127.33.44.{}".format(i),
        )
        if i in (0, 1):
            # pretend that we have already processed this outcome before
            mark_signal_sent(project_id=project_id, event_id=_get_event_id(i))
        else:
            # Last event is used to check when the outcome producer is done
            last_event_id = _get_event_id(i)
        # put the outcome on the kafka topic
        producer.produce(topic_name, msg)

    # setup django signals for event_filtered and event_dropped
    event_filtered_sink = []
    event_dropped_sink = []

    def event_filtered_receiver(**kwargs):
        event_filtered_sink.append(kwargs.get("ip"))

    def event_dropped_receiver(**kwargs):
        event_dropped_sink.append("something")

    event_filtered.connect(event_filtered_receiver)
    event_dropped.connect(event_dropped_receiver)

    consumer = get_outcomes_consumer(max_batch_size=1,
                                     max_batch_time=100,
                                     group_id=group_id,
                                     auto_offset_reset="earliest")

    # run the outcome consumer
    with task_runner():
        i = 0
        while (not is_signal_sent(project_id=project_id,
                                  event_id=last_event_id)
               and i < MAX_POLL_ITERATIONS):
            consumer._run_once()
            i += 1

    assert is_signal_sent(project_id=project_id, event_id=last_event_id)

    # verify that no signal was called (since the events have been previously processed)
    assert event_filtered_sink == ["127.33.44.2", "127.33.44.3"]
    assert len(event_dropped_sink) == 0
コード例 #3
0
def test_tsdb(kafka_producer, task_runner, kafka_admin, requires_kafka,
              monkeypatch):
    producer, project_id, topic_name = _setup_outcome_test(
        kafka_producer, kafka_admin)

    timestamps = []

    for i in range(2):
        timestamp = timezone.now()
        timestamps.append(timestamp)
        producer.produce(
            topic_name,
            _get_outcome(
                event_id=i,
                project_id=project_id,
                outcome=Outcome.RATE_LIMITED,
                reason="go_away",
                remote_addr="127.0.0.1",
                timestamp=timestamp,
            ),
        )

    # Mark first item as already processed
    mark_tsdb_incremented_many([(project_id, _get_event_id(0))])
    assert cache.get(_get_tsdb_cache_key(project_id,
                                         _get_event_id(0))) is not None
    assert cache.get(_get_tsdb_cache_key(project_id, _get_event_id(1))) is None

    tsdb_increments = []
    monkeypatch.setattr("sentry.tsdb.incr_multi", tsdb_increments.append)

    group_id = "test-outcome-consumer-6"

    consumer = get_outcomes_consumer(max_batch_size=1,
                                     max_batch_time=100,
                                     group_id=group_id,
                                     auto_offset_reset="earliest")

    i = 0

    while not tsdb_increments and i < MAX_POLL_ITERATIONS:
        consumer._run_once()
        i += 1

    assert tsdb_increments == [[
        (tsdb.models.project_total_received, project_id, {
            "timestamp": timestamps[1]
        }),
        (tsdb.models.project_total_rejected, project_id, {
            "timestamp": timestamps[1]
        }),
    ]]

    assert cache.get(_get_tsdb_cache_key(project_id,
                                         _get_event_id(0))) is not None
    assert cache.get(_get_tsdb_cache_key(project_id,
                                         _get_event_id(1))) is not None
コード例 #4
0
def test_outcome_consumer_remembers_handled_outcomes(kafka_producer,
                                                     task_runner, kafka_admin,
                                                     requires_kafka):
    producer, project_id, topic_name = _setup_outcome_test(
        kafka_producer, kafka_admin)

    group_id = "test-outcome-consumer-3"

    # put a few outcome messages on the kafka topic
    for i in six.moves.range(1, 3):
        # emit the same outcome twice ( simulate the case when the  producer goes down without
        # committing the kafka offsets and is restarted)
        msg = _get_outcome(
            event_id=1,
            project_id=project_id,
            outcome=Outcome.FILTERED,
            reason="some_reason",
            remote_addr="127.33.44.{}".format(1),
        )

        producer.produce(topic_name, msg)

    # setup django signals for event_filtered and event_dropped
    event_filtered_sink = []
    event_dropped_sink = []

    def event_filtered_receiver(**kwargs):
        event_filtered_sink.append(kwargs.get("ip"))

    def event_dropped_receiver(**kwargs):
        event_dropped_sink.append("something")

    event_filtered.connect(event_filtered_receiver)
    event_dropped.connect(event_dropped_receiver)

    consumer = get_outcomes_consumer(max_batch_size=1,
                                     max_batch_time=100,
                                     group_id=group_id,
                                     auto_offset_reset="earliest")

    # run the outcome consumer
    with task_runner():
        i = 0
        while not event_filtered_sink and i < MAX_POLL_ITERATIONS:
            consumer._run_once()
            i += 1

    # verify that the appropriate filters were called
    assert len(event_filtered_sink) == 1
    assert event_filtered_sink == ["127.33.44.1"]
    assert len(event_dropped_sink) == 0
コード例 #5
0
def test_outcome_consumer_handles_rate_limited_outcomes(
        kafka_producer, task_runner, kafka_admin, requires_kafka):
    producer, project_id, topic_name = _setup_outcome_test(
        kafka_producer, kafka_admin)

    group_id = "test-outcome-consumer-5"

    # put a few outcome messages on the kafka topic
    for i in six.moves.range(1, 3):
        msg = _get_outcome(
            event_id=i,
            project_id=project_id,
            outcome=Outcome.RATE_LIMITED,
            reason="reason_{}".format(i),
            remote_addr="127.33.44.{}".format(i),
        )

        producer.produce(topic_name, msg)

    # setup django signals for event_filtered and event_dropped
    event_filtered_sink = []
    event_dropped_sink = []

    def event_filtered_receiver(**kwargs):
        event_filtered_sink.append("something")

    def event_dropped_receiver(**kwargs):
        event_dropped_sink.append(
            (kwargs.get("ip"), kwargs.get("reason_code")))

    event_filtered.connect(event_filtered_receiver)
    event_dropped.connect(event_dropped_receiver)

    consumer = get_outcomes_consumer(max_batch_size=1,
                                     max_batch_time=100,
                                     group_id=group_id,
                                     auto_offset_reset="earliest")

    # run the outcome consumer
    with task_runner():
        i = 0
        while len(event_dropped_sink) < 2 and i < MAX_POLL_ITERATIONS:
            consumer._run_once()
            i += 1

    # verify that the appropriate filters were called
    assert len(event_filtered_sink) == 0
    assert len(event_dropped_sink) == 2
    assert set(event_dropped_sink) == set([("127.33.44.1", "reason_1"),
                                           ("127.33.44.2", "reason_2")])
コード例 #6
0
def test_outcome_consumer_ignores_invalid_outcomes(kafka_producer, task_runner,
                                                   kafka_admin,
                                                   requires_kafka):
    producer, project_id, topic_name = _setup_outcome_test(
        kafka_producer, kafka_admin)

    group_id = "test-outcome-consumer-2"

    # put a few outcome messages on the kafka topic. Add two FILTERED items so
    # we know when the producer has reached the end
    for i in range(4):
        msg = _get_outcome(
            event_id=i,
            project_id=project_id,
            outcome=Outcome.INVALID if i < 2 else Outcome.FILTERED,
            reason="some_reason",
            remote_addr="127.33.44.{}".format(i),
        )

        producer.produce(topic_name, msg)

    # setup django signals for event_filtered and event_dropped
    event_filtered_sink = []
    event_dropped_sink = []

    def event_filtered_receiver(**kwargs):
        event_filtered_sink.append(kwargs.get("ip"))

    def event_dropped_receiver(**kwargs):
        event_dropped_sink.append("something")

    event_filtered.connect(event_filtered_receiver)
    event_dropped.connect(event_dropped_receiver)

    consumer = get_outcomes_consumer(max_batch_size=1,
                                     max_batch_time=100,
                                     group_id=group_id,
                                     auto_offset_reset="earliest")

    # run the outcome consumer
    with task_runner():
        i = 0
        while len(event_filtered_sink) < 2 and i < MAX_POLL_ITERATIONS:
            consumer._run_once()
            i += 1

    # verify that the appropriate filters were called
    assert event_filtered_sink == ["127.33.44.2", "127.33.44.3"]
    assert len(event_dropped_sink) == 0
コード例 #7
0
    def run(self, predicate=None):
        if predicate is None:
            predicate = lambda: True

        consumer = get_outcomes_consumer(
            max_batch_size=1,
            max_batch_time=100,
            group_id=_get_next_group_id(),
            auto_offset_reset="earliest",
        )

        with self.task_runner():
            i = 0
            while predicate() and i < MAX_POLL_ITERATIONS:
                consumer._run_once()
                i += 1

        # Verify that we consumed everything and didn't time out
        # assert not predicate()
        assert i < MAX_POLL_ITERATIONS