Esempio n. 1
0
def _get_task_kwargs(message: Message) -> Optional[Mapping[str, Any]]:
    use_kafka_headers = options.get("post-process-forwarder:kafka-headers")

    if use_kafka_headers:
        try:
            with _sampled_eventstream_timer(instance="get_task_kwargs_for_message_from_headers"):
                return get_task_kwargs_for_message_from_headers(message.headers())
        except Exception as error:
            logger.error("Could not forward message: %s", error, exc_info=True)
            with metrics.timer(_DURATION_METRIC, instance="get_task_kwargs_for_message"):
                return get_task_kwargs_for_message(message.value())
    else:
        with metrics.timer(_DURATION_METRIC, instance="get_task_kwargs_for_message"):
            return get_task_kwargs_for_message(message.value())
Esempio n. 2
0
def test_get_task_kwargs_for_message_version_1():
    event_data = {
        "project_id": 1,
        "group_id": 2,
        "event_id": "00000000000010008080808080808080",
        "message": "message",
        "platform": "python",
        "datetime": "2018-07-20T21:04:27.600640Z",
        "data": {
            "logentry": {"formatted": "message"},
            "platform": "python",
            "timestamp": 1532120667.60064,
        },
        "extra": {},
        "primary_hash": "49f68a5c8493ec2c0bf489821c21fc3b",
    }

    task_state = {"is_new": True, "is_regression": False, "is_new_group_environment": True}

    kwargs = get_task_kwargs_for_message(json.dumps([1, "insert", event_data, task_state]))
    event = kwargs.pop("event")
    assert event.project_id == 1
    assert event.group_id == 2
    assert event.event_id == "00000000000010008080808080808080"
    assert event.message == "message"
    assert event.platform == "python"
    assert event.datetime == datetime(2018, 7, 20, 21, 4, 27, 600640, tzinfo=pytz.utc)
    assert kwargs.pop("primary_hash") == "49f68a5c8493ec2c0bf489821c21fc3b"

    assert kwargs.pop("is_new") is True
    assert kwargs.pop("is_regression") is False
    assert kwargs.pop("is_new_group_environment") is True

    assert not kwargs, f"unexpected values remaining: {kwargs!r}"
Esempio n. 3
0
    def _get_task_kwargs_and_dispatch(self, message) -> None:
        with metrics.timer("eventstream.duration",
                           instance="get_task_kwargs_for_message"):
            task_kwargs = get_task_kwargs_for_message(message.value())

        if task_kwargs is not None:
            if task_kwargs["group_id"] is None:
                metrics.incr(
                    "eventstream.messages",
                    tags={
                        "partition": message.partition(),
                        "type": "transactions"
                    },
                )
            else:
                metrics.incr(
                    "eventstream.messages",
                    tags={
                        "partition": message.partition(),
                        "type": "errors"
                    },
                )
            with metrics.timer("eventstream.duration",
                               instance="dispatch_post_process_group_task"):
                self._dispatch_post_process_group_task(**task_kwargs)
Esempio n. 4
0
def test_get_task_kwargs_for_message_version_1():
    event_data = {
        'project_id': 1,
        'group_id': 2,
        'event_id': '00000000000010008080808080808080',
        'message': 'message',
        'platform': 'python',
        'datetime': '2018-07-20T21:04:27.600640Z',
        'data': {},
        'extra': {},
        'primary_hash': '49f68a5c8493ec2c0bf489821c21fc3b',
    }

    task_state = {
        'is_new': True,
        'is_sample': False,
        'is_regression': False,
        'is_new_group_environment': True,
    }

    kwargs = get_task_kwargs_for_message(
        json.dumps([1, 'insert', event_data, task_state]))
    event = kwargs.pop('event')
    assert event.project_id == 1
    assert event.group_id == 2
    assert event.event_id == '00000000000010008080808080808080'
    assert event.message == 'message'
    assert event.platform == 'python'
    assert event.datetime == datetime(2018,
                                      7,
                                      20,
                                      21,
                                      4,
                                      27,
                                      600640,
                                      tzinfo=pytz.utc)
    assert dict(event.data) == {}

    assert kwargs.pop('primary_hash') == '49f68a5c8493ec2c0bf489821c21fc3b'

    assert kwargs.pop('is_new') is True
    assert kwargs.pop('is_sample') is False
    assert kwargs.pop('is_regression') is False
    assert kwargs.pop('is_new_group_environment') is True

    assert not kwargs, 'unexpected values remaining: {!r}'.format(kwargs)
Esempio n. 5
0
    def run_post_process_forwarder(self,
                                   consumer_group,
                                   commit_log_topic,
                                   synchronize_commit_group,
                                   commit_batch_size=100,
                                   initial_offset_reset='latest'):
        logger.debug('Starting post-process forwarder...')

        cluster_name = settings.KAFKA_TOPICS[settings.KAFKA_EVENTS]['cluster']
        bootstrap_servers = settings.KAFKA_CLUSTERS[cluster_name][
            'bootstrap.servers']

        consumer = SynchronizedConsumer(
            bootstrap_servers=bootstrap_servers,
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset=initial_offset_reset,
        )

        owned_partition_offsets = {}

        def commit(partitions):
            results = consumer.commit(offsets=partitions, asynchronous=False)

            errors = filter(lambda i: i.error is not None, results)
            if errors:
                raise Exception('Failed to commit %s/%s partitions: %r' %
                                (len(errors), len(partitions), errors))

            return results

        def on_assign(consumer, partitions):
            logger.debug('Received partition assignment: %r', partitions)

            for i in partitions:
                if i.offset == OFFSET_INVALID:
                    updated_offset = None
                elif i.offset < 0:
                    raise Exception(
                        'Received unexpected negative offset during partition assignment: %r'
                        % (i, ))
                else:
                    updated_offset = i.offset

                key = (i.topic, i.partition)
                previous_offset = owned_partition_offsets.get(key, None)
                if previous_offset is not None and previous_offset != updated_offset:
                    logger.warning(
                        'Received new offset for owned partition %r, will overwrite previous stored offset %r with %r.',
                        key, previous_offset, updated_offset)

                owned_partition_offsets[key] = updated_offset

        def on_revoke(consumer, partitions):
            logger.debug('Revoked partition assignment: %r', partitions)

            offsets_to_commit = []

            for i in partitions:
                key = (i.topic, i.partition)

                try:
                    offset = owned_partition_offsets.pop(key)
                except KeyError:
                    logger.warning(
                        'Received unexpected partition revocation for unowned partition: %r',
                        i,
                        exc_info=True)
                    continue

                if offset is None:
                    logger.debug(
                        'Skipping commit of unprocessed partition: %r', i)
                    continue

                offsets_to_commit.append(
                    TopicPartition(i.topic, i.partition, offset))

            if offsets_to_commit:
                logger.debug(
                    'Committing offset(s) for %s revoked partition(s): %r',
                    len(offsets_to_commit), offsets_to_commit)
                commit(offsets_to_commit)

        consumer.subscribe(
            [self.topic],
            on_assign=on_assign,
            on_revoke=on_revoke,
        )

        def commit_offsets():
            offsets_to_commit = []
            for (topic, partition), offset in owned_partition_offsets.items():
                if offset is None:
                    logger.debug(
                        'Skipping commit of unprocessed partition: %r',
                        (topic, partition))
                    continue

                offsets_to_commit.append(
                    TopicPartition(topic, partition, offset))

            if offsets_to_commit:
                logger.debug(
                    'Committing offset(s) for %s owned partition(s): %r',
                    len(offsets_to_commit), offsets_to_commit)
                commit(offsets_to_commit)

        try:
            i = 0
            while True:
                message = consumer.poll(0.1)
                if message is None:
                    continue

                error = message.error()
                if error is not None:
                    raise Exception(error)

                key = (message.topic(), message.partition())
                if key not in owned_partition_offsets:
                    logger.warning(
                        'Skipping message for unowned partition: %r', key)
                    continue

                i = i + 1
                owned_partition_offsets[key] = message.offset() + 1

                with metrics.timer('eventstream.duration',
                                   instance='get_task_kwargs_for_message'):
                    task_kwargs = get_task_kwargs_for_message(message.value())

                if task_kwargs is not None:
                    with metrics.timer(
                            'eventstream.duration',
                            instance='dispatch_post_process_group_task'):
                        self._dispatch_post_process_group_task(**task_kwargs)

                if i % commit_batch_size == 0:
                    commit_offsets()
        except KeyboardInterrupt:
            pass

        logger.debug('Committing offsets and closing consumer...')
        commit_offsets()

        consumer.close()
Esempio n. 6
0
def test_get_task_kwargs_for_message_version_1_unexpected_operation():
    with pytest.raises(UnexpectedOperation):
        get_task_kwargs_for_message(json.dumps([1, "invalid", {}, {}]))
Esempio n. 7
0
def test_get_task_kwargs_for_message_version_1_unsupported_operation():
    assert get_task_kwargs_for_message(json.dumps([1, "delete", {}])) is None
Esempio n. 8
0
def test_get_task_kwargs_for_message_version_1_skip_consume():
    assert (
        get_task_kwargs_for_message(json.dumps([1, "insert", {}, {"skip_consume": True}])) is None
    )
Esempio n. 9
0
def test_get_task_kwargs_for_message_invalid_version():
    with pytest.raises(InvalidVersion):
        get_task_kwargs_for_message(json.dumps([0, "insert", {}]))
Esempio n. 10
0
def test_get_task_kwargs_for_message_invalid_payload():
    with pytest.raises(InvalidPayload):
        get_task_kwargs_for_message('{"format": "invalid"}')
Esempio n. 11
0
    def run_post_process_forwarder(self, consumer_group, commit_log_topic,
                                   synchronize_commit_group, commit_batch_size=100, initial_offset_reset='latest'):
        logger.debug('Starting post-process forwarder...')

        cluster_name = settings.KAFKA_TOPICS[settings.KAFKA_EVENTS]['cluster']
        bootstrap_servers = settings.KAFKA_CLUSTERS[cluster_name]['bootstrap.servers']

        consumer = SynchronizedConsumer(
            bootstrap_servers=bootstrap_servers,
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset=initial_offset_reset,
        )

        owned_partition_offsets = {}

        def commit(partitions):
            results = consumer.commit(offsets=partitions, asynchronous=False)

            errors = filter(lambda i: i.error is not None, results)
            if errors:
                raise Exception(
                    'Failed to commit %s/%s partitions: %r' %
                    (len(errors), len(partitions), errors))

            return results

        def on_assign(consumer, partitions):
            logger.debug('Received partition assignment: %r', partitions)

            for i in partitions:
                if i.offset == OFFSET_INVALID:
                    updated_offset = None
                elif i.offset < 0:
                    raise Exception(
                        'Received unexpected negative offset during partition assignment: %r' %
                        (i,))
                else:
                    updated_offset = i.offset

                key = (i.topic, i.partition)
                previous_offset = owned_partition_offsets.get(key, None)
                if previous_offset is not None and previous_offset != updated_offset:
                    logger.warning(
                        'Received new offset for owned partition %r, will overwrite previous stored offset %r with %r.',
                        key,
                        previous_offset,
                        updated_offset)

                owned_partition_offsets[key] = updated_offset

        def on_revoke(consumer, partitions):
            logger.debug('Revoked partition assignment: %r', partitions)

            offsets_to_commit = []

            for i in partitions:
                key = (i.topic, i.partition)

                try:
                    offset = owned_partition_offsets.pop(key)
                except KeyError:
                    logger.warning(
                        'Received unexpected partition revocation for unowned partition: %r',
                        i,
                        exc_info=True)
                    continue

                if offset is None:
                    logger.debug('Skipping commit of unprocessed partition: %r', i)
                    continue

                offsets_to_commit.append(TopicPartition(i.topic, i.partition, offset))

            if offsets_to_commit:
                logger.debug(
                    'Committing offset(s) for %s revoked partition(s): %r',
                    len(offsets_to_commit),
                    offsets_to_commit)
                commit(offsets_to_commit)

        consumer.subscribe(
            [self.topic],
            on_assign=on_assign,
            on_revoke=on_revoke,
        )

        def commit_offsets():
            offsets_to_commit = []
            for (topic, partition), offset in owned_partition_offsets.items():
                if offset is None:
                    logger.debug('Skipping commit of unprocessed partition: %r', (topic, partition))
                    continue

                offsets_to_commit.append(TopicPartition(topic, partition, offset))

            if offsets_to_commit:
                logger.debug(
                    'Committing offset(s) for %s owned partition(s): %r',
                    len(offsets_to_commit),
                    offsets_to_commit)
                commit(offsets_to_commit)

        try:
            i = 0
            while True:
                message = consumer.poll(0.1)
                if message is None:
                    continue

                error = message.error()
                if error is not None:
                    raise Exception(error)

                key = (message.topic(), message.partition())
                if key not in owned_partition_offsets:
                    logger.warning('Skipping message for unowned partition: %r', key)
                    continue

                i = i + 1
                owned_partition_offsets[key] = message.offset() + 1

                with metrics.timer('eventstream.duration', instance='get_task_kwargs_for_message'):
                    task_kwargs = get_task_kwargs_for_message(message.value())

                if task_kwargs is not None:
                    with metrics.timer('eventstream.duration', instance='dispatch_post_process_group_task'):
                        self._dispatch_post_process_group_task(**task_kwargs)

                if i % commit_batch_size == 0:
                    commit_offsets()
        except KeyboardInterrupt:
            pass

        logger.debug('Committing offsets and closing consumer...')
        commit_offsets()

        consumer.close()
Esempio n. 12
0
def test_get_task_kwargs_for_message_version_1_skip_consume():
    assert get_task_kwargs_for_message(
        json.dumps([1, 'insert', {}, {
            'skip_consume': True
        }])) is None
Esempio n. 13
0
    def run_post_process_forwarder(
        self,
        consumer_group,
        commit_log_topic,
        synchronize_commit_group,
        commit_batch_size=100,
        initial_offset_reset="latest",
    ):
        logger.debug("Starting post-process forwarder...")

        cluster_name = settings.KAFKA_TOPICS[settings.KAFKA_EVENTS]["cluster"]

        consumer = SynchronizedConsumer(
            cluster_name=cluster_name,
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset=initial_offset_reset,
        )

        owned_partition_offsets = {}

        def commit(partitions):
            results = consumer.commit(offsets=partitions, asynchronous=False)

            errors = [i for i in results if i.error is not None]
            if errors:
                raise Exception(
                    "Failed to commit {}/{} partitions: {!r}".format(
                        len(errors), len(partitions), errors))

            return results

        def on_assign(consumer, partitions):
            logger.info("Received partition assignment: %r", partitions)

            for i in partitions:
                if i.offset == OFFSET_INVALID:
                    updated_offset = None
                elif i.offset < 0:
                    raise Exception(
                        f"Received unexpected negative offset during partition assignment: {i!r}"
                    )
                else:
                    updated_offset = i.offset

                key = (i.topic, i.partition)
                previous_offset = owned_partition_offsets.get(key, None)
                if previous_offset is not None and previous_offset != updated_offset:
                    logger.warning(
                        "Received new offset for owned partition %r, will overwrite previous stored offset %r with %r.",
                        key,
                        previous_offset,
                        updated_offset,
                    )

                owned_partition_offsets[key] = updated_offset

        def on_revoke(consumer, partitions):
            logger.info("Revoked partition assignment: %r", partitions)

            offsets_to_commit = []

            for i in partitions:
                key = (i.topic, i.partition)

                try:
                    offset = owned_partition_offsets.pop(key)
                except KeyError:
                    logger.warning(
                        "Received unexpected partition revocation for unowned partition: %r",
                        i,
                        exc_info=True,
                    )
                    continue

                if offset is None:
                    logger.debug(
                        "Skipping commit of unprocessed partition: %r", i)
                    continue

                offsets_to_commit.append(
                    TopicPartition(i.topic, i.partition, offset))

            if offsets_to_commit:
                logger.debug(
                    "Committing offset(s) for %s revoked partition(s): %r",
                    len(offsets_to_commit),
                    offsets_to_commit,
                )
                commit(offsets_to_commit)

        consumer.subscribe([self.topic],
                           on_assign=on_assign,
                           on_revoke=on_revoke)

        def commit_offsets():
            offsets_to_commit = []
            for (topic, partition), offset in owned_partition_offsets.items():
                if offset is None:
                    logger.debug(
                        "Skipping commit of unprocessed partition: %r",
                        (topic, partition))
                    continue

                offsets_to_commit.append(
                    TopicPartition(topic, partition, offset))

            if offsets_to_commit:
                logger.debug(
                    "Committing offset(s) for %s owned partition(s): %r",
                    len(offsets_to_commit),
                    offsets_to_commit,
                )
                commit(offsets_to_commit)

        shutdown_requested = False

        def handle_shutdown_request(signum: int, frame: Any) -> None:
            nonlocal shutdown_requested
            logger.debug("Received signal %r, requesting shutdown...", signum)
            shutdown_requested = True

        signal.signal(signal.SIGINT, handle_shutdown_request)
        signal.signal(signal.SIGTERM, handle_shutdown_request)

        i = 0
        while not shutdown_requested:
            message = consumer.poll(0.1)
            if message is None:
                continue

            error = message.error()
            if error is not None:
                raise Exception(error)

            key = (message.topic(), message.partition())
            if key not in owned_partition_offsets:
                logger.warning("Skipping message for unowned partition: %r",
                               key)
                continue

            i = i + 1
            owned_partition_offsets[key] = message.offset() + 1

            with metrics.timer("eventstream.duration",
                               instance="get_task_kwargs_for_message"):
                task_kwargs = get_task_kwargs_for_message(message.value())

            if task_kwargs is not None:
                with metrics.timer(
                        "eventstream.duration",
                        instance="dispatch_post_process_group_task"):
                    self._dispatch_post_process_group_task(**task_kwargs)

            if i % commit_batch_size == 0:
                commit_offsets()

        logger.debug("Committing offsets and closing consumer...")
        commit_offsets()

        consumer.close()