Example #1
0
    def relay(self,
              consumer_group,
              commit_log_topic,
              synchronize_commit_group,
              commit_batch_size=100,
              initial_offset_reset='latest'):
        consumer = SynchronizedConsumer(
            bootstrap_servers=self.producer_configuration['bootstrap.servers'],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset=initial_offset_reset,
        )

        consumer.subscribe([self.publish_topic])

        offsets = {}

        def commit_offsets():
            consumer.commit(offsets=[
                TopicPartition(topic, partition, offset)
                for (topic, partition), offset in offsets.items()
            ],
                            asynchronous=False)

        try:
            i = 0
            while True:
                message = consumer.poll(0.1)
                if message is None:
                    continue

                error = message.error()
                if error is not None:
                    raise Exception(error)

                i = i + 1
                offsets[(message.topic(),
                         message.partition())] = message.offset() + 1

                payload = parse_event_message(message.value())
                if payload is not None:
                    post_process_group.delay(**payload)

                if i % commit_batch_size == 0:
                    commit_offsets()
        except KeyboardInterrupt:
            pass

        logger.info('Committing offsets and closing consumer...')

        if offsets:
            commit_offsets()

        consumer.close()
Example #2
0
def test_consumer_start_from_partition_start(requires_kafka):
    synchronize_commit_group = "consumer-{}".format(uuid.uuid1().hex)

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer({
        "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
        "on_delivery": record_message_delivered,
    })

    with create_topic() as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(3):
            producer.produce(topic, "{}".format(i).encode("utf8"))

        assert producer.flush(
            5) == 0, "producer did not successfully flush queue"

        # Create the synchronized consumer.
        consumer = SynchronizedConsumer(
            cluster_name="default",
            consumer_group="consumer-{}".format(uuid.uuid1().hex),
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        assignments_received = []

        def on_assign(c, assignment):
            assert c is consumer
            assignments_received.append(assignment)

        consumer.subscribe([topic], on_assign=on_assign)

        # Wait until we have received our assignments.
        for i in xrange(10):  # this takes a while
            assert consumer.poll(1) is None
            if assignments_received:
                break

        assert len(assignments_received
                   ) == 1, "expected to receive partition assignment"
        assert set((i.topic, i.partition)
                   for i in assignments_received[0]) == set([(topic, 0)])

        # TODO: Make sure that all partitions remain paused.

        # Make sure that there are no messages ready to consume.
        assert consumer.poll(1) is None

        # Move the committed offset forward for our synchronizing group.
        message = messages_delivered[topic][0]
        producer.produce(
            commit_log_topic,
            key="{}:{}:{}".format(message.topic(), message.partition(),
                                  synchronize_commit_group).encode("utf8"),
            value="{}".format(message.offset() + 1).encode("utf8"),
        )

        assert producer.flush(
            5) == 0, "producer did not successfully flush queue"

        # We should have received a single message.
        # TODO: Can we also assert that the position is unpaused?)
        for i in xrange(5):
            message = consumer.poll(1)
            if message is not None:
                break

        assert message is not None, "no message received"

        expected_message = messages_delivered[topic][0]
        assert message.topic() == expected_message.topic()
        assert message.partition() == expected_message.partition()
        assert message.offset() == expected_message.offset()

        # We should not be able to continue reading into the topic.
        # TODO: Can we assert that the position is paused?
        assert consumer.poll(1) is None
Example #3
0
def test_consumer_rebalance_from_uncommitted_offset(requires_kafka):
    consumer_group = "consumer-{}".format(uuid.uuid1().hex)
    synchronize_commit_group = "consumer-{}".format(uuid.uuid1().hex)

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer({
        "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
        "on_delivery": record_message_delivered,
    })

    with create_topic(
            partitions=2) as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(4):
            producer.produce(topic,
                             "{}".format(i).encode("utf8"),
                             partition=i % 2)

        assert producer.flush(
            5) == 0, "producer did not successfully flush queue"

        for (topic, partition), offset in {
            (message.topic(), message.partition()): message.offset()
                for message in messages_delivered[topic]
        }.items():
            producer.produce(
                commit_log_topic,
                key="{}:{}:{}".format(topic, partition,
                                      synchronize_commit_group).encode("utf8"),
                value="{}".format(offset + 1).encode("utf8"),
            )

        assert producer.flush(
            5) == 0, "producer did not successfully flush queue"
        consumer_a = SynchronizedConsumer(
            cluster_name="default",
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        assignments_received = defaultdict(list)

        def on_assign(consumer, assignment):
            assignments_received[consumer].append(assignment)

        consumer_a.subscribe([topic], on_assign=on_assign)

        consume_until_constraints_met(
            consumer_a,
            [
                lambda message: assignments_received[consumer_a],
                collect_messages_received(4)
            ],
            10,
        )

        assert (len(assignments_received[consumer_a]) == 1
                ), "expected to receive partition assignment"
        assert set(
            (i.topic, i.partition)
            for i in assignments_received[consumer_a][0]) == set([(topic, 0),
                                                                  (topic, 1)])
        assignments_received[consumer_a].pop()

        message = consumer_a.poll(1)
        assert (message is None or message.error() is KafkaError._PARTITION_EOF
                ), "there should be no more messages to receive"

        consumer_b = SynchronizedConsumer(
            cluster_name="default",
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        consumer_b.subscribe([topic], on_assign=on_assign)

        consume_until_constraints_met(
            consumer_a, [lambda message: assignments_received[consumer_a]], 10)

        consume_until_constraints_met(
            consumer_b,
            [
                lambda message: assignments_received[consumer_b],
                collect_messages_received(2)
            ],
            10,
        )

        for consumer in [consumer_a, consumer_b]:
            assert len(assignments_received[consumer][0]) == 1

        message = consumer_a.poll(1)
        assert (message is None or message.error() is KafkaError._PARTITION_EOF
                ), "there should be no more messages to receive"

        message = consumer_b.poll(1)
        assert (message is None or message.error() is KafkaError._PARTITION_EOF
                ), "there should be no more messages to receive"
Example #4
0
    def run_post_process_forwarder(self,
                                   consumer_group,
                                   commit_log_topic,
                                   synchronize_commit_group,
                                   commit_batch_size=100,
                                   initial_offset_reset='latest'):
        logger.debug('Starting post-process forwarder...')

        cluster_name = settings.KAFKA_TOPICS[settings.KAFKA_EVENTS]['cluster']
        bootstrap_servers = settings.KAFKA_CLUSTERS[cluster_name][
            'bootstrap.servers']

        consumer = SynchronizedConsumer(
            bootstrap_servers=bootstrap_servers,
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset=initial_offset_reset,
        )

        owned_partition_offsets = {}

        def commit(partitions):
            results = consumer.commit(offsets=partitions, asynchronous=False)

            errors = filter(lambda i: i.error is not None, results)
            if errors:
                raise Exception('Failed to commit %s/%s partitions: %r' %
                                (len(errors), len(partitions), errors))

            return results

        def on_assign(consumer, partitions):
            logger.debug('Received partition assignment: %r', partitions)

            for i in partitions:
                if i.offset == OFFSET_INVALID:
                    updated_offset = None
                elif i.offset < 0:
                    raise Exception(
                        'Received unexpected negative offset during partition assignment: %r'
                        % (i, ))
                else:
                    updated_offset = i.offset

                key = (i.topic, i.partition)
                previous_offset = owned_partition_offsets.get(key, None)
                if previous_offset is not None and previous_offset != updated_offset:
                    logger.warning(
                        'Received new offset for owned partition %r, will overwrite previous stored offset %r with %r.',
                        key, previous_offset, updated_offset)

                owned_partition_offsets[key] = updated_offset

        def on_revoke(consumer, partitions):
            logger.debug('Revoked partition assignment: %r', partitions)

            offsets_to_commit = []

            for i in partitions:
                key = (i.topic, i.partition)

                try:
                    offset = owned_partition_offsets.pop(key)
                except KeyError:
                    logger.warning(
                        'Received unexpected partition revocation for unowned partition: %r',
                        i,
                        exc_info=True)
                    continue

                if offset is None:
                    logger.debug(
                        'Skipping commit of unprocessed partition: %r', i)
                    continue

                offsets_to_commit.append(
                    TopicPartition(i.topic, i.partition, offset))

            if offsets_to_commit:
                logger.debug(
                    'Committing offset(s) for %s revoked partition(s): %r',
                    len(offsets_to_commit), offsets_to_commit)
                commit(offsets_to_commit)

        consumer.subscribe(
            [self.topic],
            on_assign=on_assign,
            on_revoke=on_revoke,
        )

        def commit_offsets():
            offsets_to_commit = []
            for (topic, partition), offset in owned_partition_offsets.items():
                if offset is None:
                    logger.debug(
                        'Skipping commit of unprocessed partition: %r',
                        (topic, partition))
                    continue

                offsets_to_commit.append(
                    TopicPartition(topic, partition, offset))

            if offsets_to_commit:
                logger.debug(
                    'Committing offset(s) for %s owned partition(s): %r',
                    len(offsets_to_commit), offsets_to_commit)
                commit(offsets_to_commit)

        try:
            i = 0
            while True:
                message = consumer.poll(0.1)
                if message is None:
                    continue

                error = message.error()
                if error is not None:
                    raise Exception(error)

                key = (message.topic(), message.partition())
                if key not in owned_partition_offsets:
                    logger.warning(
                        'Skipping message for unowned partition: %r', key)
                    continue

                i = i + 1
                owned_partition_offsets[key] = message.offset() + 1

                with metrics.timer('eventstream.duration',
                                   instance='get_task_kwargs_for_message'):
                    task_kwargs = get_task_kwargs_for_message(message.value())

                if task_kwargs is not None:
                    with metrics.timer(
                            'eventstream.duration',
                            instance='dispatch_post_process_group_task'):
                        self._dispatch_post_process_group_task(**task_kwargs)

                if i % commit_batch_size == 0:
                    commit_offsets()
        except KeyboardInterrupt:
            pass

        logger.debug('Committing offsets and closing consumer...')
        commit_offsets()

        consumer.close()
Example #5
0
def test_consumer_rebalance_from_committed_offset(requires_kafka):
    consumer_group = "consumer-{}".format(uuid.uuid1().hex)
    synchronize_commit_group = "consumer-{}".format(uuid.uuid1().hex)

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer({
        "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
        "on_delivery": record_message_delivered,
    })

    with create_topic(
            partitions=2) as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(4):
            producer.produce(topic,
                             "{}".format(i).encode("utf8"),
                             partition=i % 2)

        assert producer.flush(
            5) == 0, "producer did not successfully flush queue"

        Consumer({
            "bootstrap.servers": os.environ["SENTRY_KAFKA_HOSTS"],
            "group.id": consumer_group
        }).commit(
            offsets=[
                TopicPartition(message.topic(), message.partition(),
                               message.offset() + 1)
                for message in messages_delivered[topic][:2]
            ],
            asynchronous=False,
        )

        consumer_a = SynchronizedConsumer(
            cluster_name="default",
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        assignments_received = defaultdict(list)

        def on_assign(consumer, assignment):
            assignments_received[consumer].append(assignment)

        consumer_a.subscribe([topic], on_assign=on_assign)

        # Wait until the first consumer has received its assignments.
        for i in xrange(10):  # this takes a while
            assert consumer_a.poll(1) is None
            if assignments_received[consumer_a]:
                break

        assert (len(assignments_received[consumer_a]) == 1
                ), "expected to receive partition assignment"
        assert set(
            (i.topic, i.partition)
            for i in assignments_received[consumer_a][0]) == set([(topic, 0),
                                                                  (topic, 1)])

        assignments_received[consumer_a].pop()

        consumer_b = SynchronizedConsumer(
            cluster_name="default",
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset="earliest",
        )

        consumer_b.subscribe([topic], on_assign=on_assign)

        assignments = {}

        # Wait until *both* consumers have received updated assignments.
        for consumer in [consumer_a, consumer_b]:
            for i in xrange(10):  # this takes a while
                assert consumer.poll(1) is None
                if assignments_received[consumer]:
                    break

            assert (len(assignments_received[consumer]) == 1
                    ), "expected to receive partition assignment"
            assert (len(assignments_received[consumer][0]) == 1
                    ), "expected to have a single partition assignment"

            i = assignments_received[consumer][0][0]
            assignments[(i.topic, i.partition)] = consumer

        assert set(assignments.keys()) == set([(topic, 0), (topic, 1)])

        for expected_message in messages_delivered[topic][2:]:
            consumer = assignments[(expected_message.topic(),
                                    expected_message.partition())]

            # Make sure that there are no messages ready to consume.
            assert consumer.poll(1) is None

            # Move the committed offset forward for our synchronizing group.
            producer.produce(
                commit_log_topic,
                key="{}:{}:{}".format(expected_message.topic(),
                                      expected_message.partition(),
                                      synchronize_commit_group).encode("utf8"),
                value="{}".format(expected_message.offset() +
                                  1).encode("utf8"),
            )

            assert producer.flush(
                5) == 0, "producer did not successfully flush queue"

            # We should have received a single message.
            # TODO: Can we also assert that the position is unpaused?)
            for i in xrange(5):
                received_message = consumer.poll(1)
                if received_message is not None:
                    break

            assert received_message is not None, "no message received"

            assert received_message.topic() == expected_message.topic()
            assert received_message.partition() == expected_message.partition()
            assert received_message.offset() == expected_message.offset()

            # We should not be able to continue reading into the topic.
            # TODO: Can we assert that the position is paused?
            assert consumer.poll(1) is None
Example #6
0
def test_consumer_rebalance_from_uncommitted_offset():
    consumer_group = 'consumer-{}'.format(uuid.uuid1().hex)
    synchronize_commit_group = 'consumer-{}'.format(uuid.uuid1().hex)

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer({
        'bootstrap.servers': os.environ['SENTRY_KAFKA_HOSTS'],
        'on_delivery': record_message_delivered,
    })

    with create_topic(
            partitions=2) as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(4):
            producer.produce(topic,
                             '{}'.format(i).encode('utf8'),
                             partition=i % 2)

        assert producer.flush(
            5) == 0, 'producer did not successfully flush queue'

        for (topic, partition), offset in {
            (message.topic(), message.partition()): message.offset()
                for message in messages_delivered[topic]
        }.items():
            producer.produce(
                commit_log_topic,
                key='{}:{}:{}'.format(
                    topic,
                    partition,
                    synchronize_commit_group,
                ).encode('utf8'),
                value='{}'.format(offset + 1, ).encode('utf8'),
            )

        assert producer.flush(
            5) == 0, 'producer did not successfully flush queue'

        consumer_a = SynchronizedConsumer(
            bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset='earliest',
        )

        assignments_received = defaultdict(list)

        def on_assign(consumer, assignment):
            assignments_received[consumer].append(assignment)

        consumer_a.subscribe([topic], on_assign=on_assign)

        consume_until_constraints_met(consumer_a, [
            lambda message: assignments_received[consumer_a],
            collect_messages_recieved(4),
        ], 10)

        assert len(assignments_received[consumer_a]
                   ) == 1, 'expected to receive partition assignment'
        assert set(
            (i.topic, i.partition)
            for i in assignments_received[consumer_a][0]) == set([(topic, 0),
                                                                  (topic, 1)])
        assignments_received[consumer_a].pop()

        message = consumer_a.poll(1)
        assert message is None or message.error(
        ) is KafkaError._PARTITION_EOF, 'there should be no more messages to recieve'

        consumer_b = SynchronizedConsumer(
            bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset='earliest',
        )

        consumer_b.subscribe([topic], on_assign=on_assign)

        consume_until_constraints_met(consumer_a, [
            lambda message: assignments_received[consumer_a],
        ], 10)

        consume_until_constraints_met(consumer_b, [
            lambda message: assignments_received[consumer_b],
            collect_messages_recieved(2),
        ], 10)

        for consumer in [consumer_a, consumer_b]:
            assert len(assignments_received[consumer][0]) == 1

        message = consumer_a.poll(1)
        assert message is None or message.error(
        ) is KafkaError._PARTITION_EOF, 'there should be no more messages to recieve'

        message = consumer_b.poll(1)
        assert message is None or message.error(
        ) is KafkaError._PARTITION_EOF, 'there should be no more messages to recieve'
Example #7
0
def test_consumer_rebalance_from_partition_start():
    consumer_group = 'consumer-{}'.format(uuid.uuid1().hex)
    synchronize_commit_group = 'consumer-{}'.format(uuid.uuid1().hex)

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer({
        'bootstrap.servers': os.environ['SENTRY_KAFKA_HOSTS'],
        'on_delivery': record_message_delivered,
    })

    with create_topic(
            partitions=2) as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(4):
            producer.produce(topic,
                             '{}'.format(i).encode('utf8'),
                             partition=i % 2)

        assert producer.flush(
            5) == 0, 'producer did not successfully flush queue'

        consumer_a = SynchronizedConsumer(
            bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset='earliest',
        )

        assignments_received = defaultdict(list)

        def on_assign(consumer, assignment):
            assignments_received[consumer].append(assignment)

        consumer_a.subscribe([topic], on_assign=on_assign)

        # Wait until the first consumer has received its assignments.
        for i in xrange(10):  # this takes a while
            assert consumer_a.poll(1) is None
            if assignments_received[consumer_a]:
                break

        assert len(assignments_received[consumer_a]
                   ) == 1, 'expected to receive partition assignment'
        assert set(
            (i.topic, i.partition)
            for i in assignments_received[consumer_a][0]) == set([(topic, 0),
                                                                  (topic, 1)])

        assignments_received[consumer_a].pop()

        consumer_b = SynchronizedConsumer(
            bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset='earliest',
        )

        consumer_b.subscribe([topic], on_assign=on_assign)

        assignments = {}

        # Wait until *both* consumers have received updated assignments.
        for consumer in [consumer_a, consumer_b]:
            for i in xrange(10):  # this takes a while
                assert consumer.poll(1) is None
                if assignments_received[consumer]:
                    break

            assert len(assignments_received[consumer]
                       ) == 1, 'expected to receive partition assignment'
            assert len(
                assignments_received[consumer]
                [0]) == 1, 'expected to have a single partition assignment'

            i = assignments_received[consumer][0][0]
            assignments[(i.topic, i.partition)] = consumer

        assert set(assignments.keys()) == set([(topic, 0), (topic, 1)])

        for expected_message in messages_delivered[topic]:
            consumer = assignments[(expected_message.topic(),
                                    expected_message.partition())]

            # Make sure that there are no messages ready to consume.
            assert consumer.poll(1) is None

            # Move the committed offset forward for our synchronizing group.
            producer.produce(
                commit_log_topic,
                key='{}:{}:{}'.format(
                    expected_message.topic(),
                    expected_message.partition(),
                    synchronize_commit_group,
                ).encode('utf8'),
                value='{}'.format(expected_message.offset() +
                                  1, ).encode('utf8'),
            )

            assert producer.flush(
                5) == 0, 'producer did not successfully flush queue'

            # We should have received a single message.
            # TODO: Can we also assert that the position is unpaused?)
            for i in xrange(5):
                received_message = consumer.poll(1)
                if received_message is not None:
                    break

            assert received_message is not None, 'no message received'

            assert received_message.topic() == expected_message.topic()
            assert received_message.partition() == expected_message.partition()
            assert received_message.offset() == expected_message.offset()

            # We should not be able to continue reading into the topic.
            # TODO: Can we assert that the position is paused?
            assert consumer.poll(1) is None
Example #8
0
def test_consumer_start_from_committed_offset():
    consumer_group = 'consumer-{}'.format(uuid.uuid1().hex)
    synchronize_commit_group = 'consumer-{}'.format(uuid.uuid1().hex)

    messages_delivered = defaultdict(list)

    def record_message_delivered(error, message):
        assert error is None
        messages_delivered[message.topic()].append(message)

    producer = Producer({
        'bootstrap.servers': os.environ['SENTRY_KAFKA_HOSTS'],
        'on_delivery': record_message_delivered,
    })

    with create_topic() as topic, create_topic() as commit_log_topic:

        # Produce some messages into the topic.
        for i in range(3):
            producer.produce(topic, '{}'.format(i).encode('utf8'))

        assert producer.flush(
            5) == 0, 'producer did not successfully flush queue'

        Consumer({
            'bootstrap.servers': os.environ['SENTRY_KAFKA_HOSTS'],
            'group.id': consumer_group,
        }).commit(
            message=messages_delivered[topic][0],
            asynchronous=False,
        )

        # Create the synchronized consumer.
        consumer = SynchronizedConsumer(
            bootstrap_servers=os.environ['SENTRY_KAFKA_HOSTS'],
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset='earliest',
        )

        assignments_received = []

        def on_assign(c, assignment):
            assert c is consumer
            assignments_received.append(assignment)

        consumer.subscribe([topic], on_assign=on_assign)

        # Wait until we have received our assignments.
        for i in xrange(10):  # this takes a while
            assert consumer.poll(1) is None
            if assignments_received:
                break

        assert len(assignments_received
                   ) == 1, 'expected to receive partition assignment'
        assert set((i.topic, i.partition)
                   for i in assignments_received[0]) == set([(topic, 0)])

        # TODO: Make sure that all partitions are paused on assignment.

        # Move the committed offset forward for our synchronizing group.
        message = messages_delivered[topic][0]
        producer.produce(
            commit_log_topic,
            key='{}:{}:{}'.format(
                message.topic(),
                message.partition(),
                synchronize_commit_group,
            ).encode('utf8'),
            value='{}'.format(message.offset() + 1, ).encode('utf8'),
        )

        # Make sure that there are no messages ready to consume.
        assert consumer.poll(1) is None

        # Move the committed offset forward for our synchronizing group.
        message = messages_delivered[topic][0 + 1]  # second message
        producer.produce(
            commit_log_topic,
            key='{}:{}:{}'.format(
                message.topic(),
                message.partition(),
                synchronize_commit_group,
            ).encode('utf8'),
            value='{}'.format(message.offset() + 1, ).encode('utf8'),
        )

        assert producer.flush(
            5) == 0, 'producer did not successfully flush queue'

        # We should have received a single message.
        # TODO: Can we also assert that the position is unpaused?)
        for i in xrange(5):
            message = consumer.poll(1)
            if message is not None:
                break

        assert message is not None, 'no message received'

        expected_message = messages_delivered[topic][0 + 1]  # second message
        assert message.topic() == expected_message.topic()
        assert message.partition() == expected_message.partition()
        assert message.offset() == expected_message.offset()

        # We should not be able to continue reading into the topic.
        # TODO: Can we assert that the position is paused?
        assert consumer.poll(1) is None
Example #9
0
    def run_streaming_consumer(
        self,
        consumer_group,
        commit_log_topic,
        synchronize_commit_group,
        commit_batch_size=100,
        initial_offset_reset="latest",
    ):
        cluster_name = settings.KAFKA_TOPICS[settings.KAFKA_EVENTS]["cluster"]

        consumer = SynchronizedConsumer(
            cluster_name=cluster_name,
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset=initial_offset_reset,
        )

        owned_partition_offsets = {}

        def commit(partitions):
            results = consumer.commit(offsets=partitions, asynchronous=False)

            errors = [i for i in results if i.error is not None]
            if errors:
                raise Exception(
                    "Failed to commit {}/{} partitions: {!r}".format(
                        len(errors), len(partitions), errors))

            return results

        def on_assign(consumer, partitions):
            logger.info("Received partition assignment: %r", partitions)

            for i in partitions:
                if i.offset == OFFSET_INVALID:
                    updated_offset = None
                elif i.offset < 0:
                    raise Exception(
                        f"Received unexpected negative offset during partition assignment: {i!r}"
                    )
                else:
                    updated_offset = i.offset

                key = (i.topic, i.partition)
                previous_offset = owned_partition_offsets.get(key, None)
                if previous_offset is not None and previous_offset != updated_offset:
                    logger.warning(
                        "Received new offset for owned partition %r, will overwrite previous stored offset %r with %r.",
                        key,
                        previous_offset,
                        updated_offset,
                    )

                owned_partition_offsets[key] = updated_offset

        def on_revoke(consumer, partitions):
            logger.info("Revoked partition assignment: %r", partitions)

            offsets_to_commit = []

            for i in partitions:
                key = (i.topic, i.partition)

                try:
                    offset = owned_partition_offsets.pop(key)
                except KeyError:
                    logger.warning(
                        "Received unexpected partition revocation for unowned partition: %r",
                        i,
                        exc_info=True,
                    )
                    continue

                if offset is None:
                    logger.debug(
                        "Skipping commit of unprocessed partition: %r", i)
                    continue

                offsets_to_commit.append(
                    TopicPartition(i.topic, i.partition, offset))

            if offsets_to_commit:
                logger.debug(
                    "Committing offset(s) for %s revoked partition(s): %r",
                    len(offsets_to_commit),
                    offsets_to_commit,
                )
                commit(offsets_to_commit)

        consumer.subscribe([self.topic],
                           on_assign=on_assign,
                           on_revoke=on_revoke)

        def commit_offsets():
            offsets_to_commit = []
            for (topic, partition), offset in owned_partition_offsets.items():
                if offset is None:
                    logger.debug(
                        "Skipping commit of unprocessed partition: %r",
                        (topic, partition))
                    continue

                offsets_to_commit.append(
                    TopicPartition(topic, partition, offset))

            if offsets_to_commit:
                logger.debug(
                    "Committing offset(s) for %s owned partition(s): %r",
                    len(offsets_to_commit),
                    offsets_to_commit,
                )
                commit(offsets_to_commit)

        shutdown_requested = False

        def handle_shutdown_request(signum: int, frame: Any) -> None:
            nonlocal shutdown_requested
            logger.debug("Received signal %r, requesting shutdown...", signum)
            shutdown_requested = True

        signal.signal(signal.SIGINT, handle_shutdown_request)
        signal.signal(signal.SIGTERM, handle_shutdown_request)

        i = 0
        while not shutdown_requested:
            message = consumer.poll(0.1)
            if message is None:
                continue

            error = message.error()
            if error is not None:
                raise Exception(error)

            key = (message.topic(), message.partition())
            if key not in owned_partition_offsets:
                logger.warning("Skipping message for unowned partition: %r",
                               key)
                continue

            i = i + 1
            owned_partition_offsets[key] = message.offset() + 1

            use_kafka_headers = options.get(
                "post-process-forwarder:kafka-headers")

            if use_kafka_headers is True:
                try:
                    with _sampled_eventstream_timer(
                            instance="get_task_kwargs_for_message_from_headers"
                    ):
                        task_kwargs = get_task_kwargs_for_message_from_headers(
                            message.headers())

                    if task_kwargs is not None:
                        with _sampled_eventstream_timer(
                                instance="dispatch_post_process_group_task"):
                            if task_kwargs["group_id"] is None:
                                metrics.incr(
                                    "eventstream.messages",
                                    tags={
                                        "partition": message.partition(),
                                        "type": "transactions"
                                    },
                                )
                            else:
                                metrics.incr(
                                    "eventstream.messages",
                                    tags={
                                        "partition": message.partition(),
                                        "type": "errors"
                                    },
                                )
                            self._dispatch_post_process_group_task(
                                **task_kwargs)

                except Exception as error:
                    logger.error("Could not forward message: %s",
                                 error,
                                 exc_info=True)
                    self._get_task_kwargs_and_dispatch(message)

            else:
                self._get_task_kwargs_and_dispatch(message)

            if i % commit_batch_size == 0:
                commit_offsets()

        logger.debug("Committing offsets and closing consumer...")
        commit_offsets()

        consumer.close()
Example #10
0
    def run_post_process_forwarder(self, consumer_group, commit_log_topic,
                                   synchronize_commit_group, commit_batch_size=100, initial_offset_reset='latest'):
        logger.debug('Starting post-process forwarder...')

        cluster_name = settings.KAFKA_TOPICS[settings.KAFKA_EVENTS]['cluster']
        bootstrap_servers = settings.KAFKA_CLUSTERS[cluster_name]['bootstrap.servers']

        consumer = SynchronizedConsumer(
            bootstrap_servers=bootstrap_servers,
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset=initial_offset_reset,
        )

        owned_partition_offsets = {}

        def commit(partitions):
            results = consumer.commit(offsets=partitions, asynchronous=False)

            errors = filter(lambda i: i.error is not None, results)
            if errors:
                raise Exception(
                    'Failed to commit %s/%s partitions: %r' %
                    (len(errors), len(partitions), errors))

            return results

        def on_assign(consumer, partitions):
            logger.debug('Received partition assignment: %r', partitions)

            for i in partitions:
                if i.offset == OFFSET_INVALID:
                    updated_offset = None
                elif i.offset < 0:
                    raise Exception(
                        'Received unexpected negative offset during partition assignment: %r' %
                        (i,))
                else:
                    updated_offset = i.offset

                key = (i.topic, i.partition)
                previous_offset = owned_partition_offsets.get(key, None)
                if previous_offset is not None and previous_offset != updated_offset:
                    logger.warning(
                        'Received new offset for owned partition %r, will overwrite previous stored offset %r with %r.',
                        key,
                        previous_offset,
                        updated_offset)

                owned_partition_offsets[key] = updated_offset

        def on_revoke(consumer, partitions):
            logger.debug('Revoked partition assignment: %r', partitions)

            offsets_to_commit = []

            for i in partitions:
                key = (i.topic, i.partition)

                try:
                    offset = owned_partition_offsets.pop(key)
                except KeyError:
                    logger.warning(
                        'Received unexpected partition revocation for unowned partition: %r',
                        i,
                        exc_info=True)
                    continue

                if offset is None:
                    logger.debug('Skipping commit of unprocessed partition: %r', i)
                    continue

                offsets_to_commit.append(TopicPartition(i.topic, i.partition, offset))

            if offsets_to_commit:
                logger.debug(
                    'Committing offset(s) for %s revoked partition(s): %r',
                    len(offsets_to_commit),
                    offsets_to_commit)
                commit(offsets_to_commit)

        consumer.subscribe(
            [self.topic],
            on_assign=on_assign,
            on_revoke=on_revoke,
        )

        def commit_offsets():
            offsets_to_commit = []
            for (topic, partition), offset in owned_partition_offsets.items():
                if offset is None:
                    logger.debug('Skipping commit of unprocessed partition: %r', (topic, partition))
                    continue

                offsets_to_commit.append(TopicPartition(topic, partition, offset))

            if offsets_to_commit:
                logger.debug(
                    'Committing offset(s) for %s owned partition(s): %r',
                    len(offsets_to_commit),
                    offsets_to_commit)
                commit(offsets_to_commit)

        try:
            i = 0
            while True:
                message = consumer.poll(0.1)
                if message is None:
                    continue

                error = message.error()
                if error is not None:
                    raise Exception(error)

                key = (message.topic(), message.partition())
                if key not in owned_partition_offsets:
                    logger.warning('Skipping message for unowned partition: %r', key)
                    continue

                i = i + 1
                owned_partition_offsets[key] = message.offset() + 1

                with metrics.timer('eventstream.duration', instance='get_task_kwargs_for_message'):
                    task_kwargs = get_task_kwargs_for_message(message.value())

                if task_kwargs is not None:
                    with metrics.timer('eventstream.duration', instance='dispatch_post_process_group_task'):
                        self._dispatch_post_process_group_task(**task_kwargs)

                if i % commit_batch_size == 0:
                    commit_offsets()
        except KeyboardInterrupt:
            pass

        logger.debug('Committing offsets and closing consumer...')
        commit_offsets()

        consumer.close()