Esempio n. 1
0
def test_simple_consumer():
    topic = create_random_topic(1, 1)

    messages = [str(i).encode("UTF-8") for i in range(100)]

    cluster_config = ClusterConfig(None, None, [KAFKA_URL], ZOOKEEPER_URL)
    producer = YelpKafkaSimpleProducer(
        cluster_config=cluster_config,
        report_metrics=False,
        client=KafkaClient(KAFKA_URL),
    )
    producer.send_messages(topic, *messages)

    config = KafkaConsumerConfig('test',
                                 cluster_config,
                                 auto_offset_reset='smallest',
                                 auto_commit=False,
                                 consumer_timeout_ms=1000)
    consumer = KafkaSimpleConsumer(topic, config)

    with consumer:
        for expected_offset in range(100):
            message = consumer.get_message()
            assert message.offset == expected_offset
            assert message.partition == 0
            assert message.value == str(expected_offset).encode("UTF-8")
    def _kafka_consumer_config(self):
        """ The `KafkaConsumerConfig` for the Consumer.

        Notes:
            This is not a `@cached_property` since there is the possibility
            that the cluster_config could change during runtime and users could
            leverage this for responding to topology changes.

            `auto_commit` is set to False to ensure clients can determine when
            they want their topic offsets committed via commit_messages(..)
        """
        return KafkaConsumerConfig(
            group_id=self.client_name,
            cluster=self._region_cluster_config,
            auto_offset_reset=self.auto_offset_reset,
            auto_commit=False,
            partitioner_cooldown=self.partitioner_cooldown,
            use_group_sha=self.use_group_sha,
            pre_rebalance_callback=self.pre_rebalance_callback,
            post_rebalance_callback=self.
            _apply_post_rebalance_callback_to_partition,

            # TODO(joshszep|DATAPIPE-2143): switch to offset_storage='kafka'
            # after all consumers are migrated
            offset_storage='dual',
        )
Esempio n. 3
0
def config(cluster, mock_pre_rebalance_cb, mock_post_rebalance_cb):
    return KafkaConsumerConfig(cluster=cluster,
                               group_id='test_group',
                               client_id='test_client_id',
                               partitioner_cooldown=0.5,
                               pre_rebalance_callback=mock_pre_rebalance_cb,
                               post_rebalance_callback=mock_post_rebalance_cb)
    def test__acquire_has_no_consumer(self, mock_consumer, cluster,
                                      example_partitions):
        config = KafkaConsumerConfig(self.group, cluster)
        consumer = KafkaConsumerGroup([], config)

        consumer._acquire(example_partitions)
        mock_consumer.assert_called_once_with(example_partitions,
                                              **consumer.config)
    def test__should_keep_trying_no_timeout(self, cluster):
        config = KafkaConsumerConfig(self.group,
                                     cluster,
                                     consumer_timeout_ms=-1)
        consumer = KafkaConsumerGroup([], config)

        long_time_ago = time.time() - 1000
        assert consumer._should_keep_trying(long_time_ago)
    def test__should_keep_trying_timed_out(self, mock_time, cluster):
        mock_time.return_value = 0

        config = KafkaConsumerConfig(self.group,
                                     cluster,
                                     consumer_timeout_ms=1000)
        consumer = KafkaConsumerGroup([], config)

        over_a_second_ago = time.time() - 1.2
        assert not consumer._should_keep_trying(over_a_second_ago)
Esempio n. 7
0
def run_kafka_consumer_group_test(num_consumers, num_partitions):
    topic = create_random_topic(1, num_partitions)
    cluster_config = ClusterConfig(None, None, [KAFKA_URL], ZOOKEEPER_URL)
    config = KafkaConsumerConfig(
        'test',
        cluster_config,
        auto_offset_reset='smallest',
        partitioner_cooldown=5,
        auto_commit_interval_messages=1,
    )

    queue = Queue()

    def create_consumer():
        def consume():
            consumer = KafkaConsumerGroup([topic], config)
            with consumer:
                while True:
                    try:
                        message = consumer.next()
                        queue.put(message)
                        consumer.task_done(message)
                    except ConsumerTimeout:
                        return

        p = Process(target=consume)
        p.daemon = True
        return p

    consumer_processes = [create_consumer() for _ in range(num_consumers)]

    for consumer_process in consumer_processes:
        consumer_process.start()

    producer = YelpKafkaSimpleProducer(
        cluster_config=cluster_config,
        report_metrics=False,
        client=KafkaClient(KAFKA_URL),
    )
    for i in range(100):
        producer.send_messages(topic, str(i).encode("UTF-8"))

    # wait until all 100 messages have been consumed
    while queue.qsize() < 100:
        time.sleep(0.1)

    received_messages = []
    while True:
        try:
            message = queue.get(block=True, timeout=0.5)
        except Empty:
            break
        received_messages.append(int(message.value))

    assert [i for i in range(100)] == sorted(received_messages)
Esempio n. 8
0
    def test___eq__(self):
        consumer_config = {
            'buffer_size': 1024,
            'auto_commit_every_n': 100,
            'auto_commit_every_t': 20,
            'auto_commit': True,
            'fetch_size_bytes': 4096,
            'max_buffer_size': None,
            'iter_timeout': 120,
        }
        cluster_config = ClusterConfig(
            type='mykafka',
            name='some_cluster',
            broker_list=['kafka-cluster-1:9092', 'kafka-cluster-2:9092'],
            zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,')

        # Re-ordered consumer config dict
        consumer_config_reordered = {
            'fetch_size_bytes': 4096,
            'auto_commit_every_t': 20,
            'auto_commit': True,
            'max_buffer_size': None,
            'buffer_size': 1024,
            'iter_timeout': 120,
            'auto_commit_every_n': 100,
        }
        cluster_config_reordered = ClusterConfig(
            type='mykafka',
            name='some_cluster',
            broker_list=['kafka-cluster-2:9092', 'kafka-cluster-1:9092'],
            zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,')

        config1 = KafkaConsumerConfig("some_group", cluster_config,
                                      **consumer_config)
        config2 = KafkaConsumerConfig("some_group", cluster_config,
                                      **consumer_config)
        assert config1 == config2

        # Let's use a re-ordered map with the same config
        config2 = KafkaConsumerConfig("some_group", cluster_config_reordered,
                                      **consumer_config_reordered)
        assert config1 == config2
 def group(self, _, mock_pre_rebalance_cb, mock_post_rebalance_cb):
     config = KafkaConsumerConfig(
         cluster={
             'broker_list': ['test_broker:9292'],
             'zookeeper': 'zookeeper_uri1:2181,zookeeper_uri2:2181'
         },
         group_id='test_group',
         client_id='test_client_id',
         max_termination_timeout_secs=0.1,
         pre_rebalance_callback=mock_pre_rebalance_cb,
         post_rebalance_callback=mock_post_rebalance_cb)
     return MultiprocessingConsumerGroup(self.topics, config, mock.Mock())
    def test__release_retry(self, cluster):
        config = KafkaConsumerConfig(self.group,
                                     cluster,
                                     auto_commit_enable=True)
        consumer = KafkaConsumerGroup([], config)

        mock_consumer = mock.Mock()
        mock_consumer.set_topic_partitions.side_effect = KafkaUnavailableError
        consumer.consumer = mock_consumer

        with pytest.raises(KafkaUnavailableError):
            consumer._release({})
        assert mock_consumer.set_topic_partitions.call_count == 2
Esempio n. 11
0
    def test_partitioner_use_sha_false(self, cluster):
        config = KafkaConsumerConfig(
            cluster=cluster,
            group_id='test_group',
            client_id='test_client_id',
            partitioner_cooldown=0.5,
            use_group_sha=False,
            pre_rebalance_callback=mock.Mock(),
            post_rebalance_callback=mock.Mock(),
        )
        p = Partitioner(config, self.topics, mock.Mock(), mock.Mock())

        assert p.zk_group_path == '/yelp-kafka/test_group'
Esempio n. 12
0
def get_consumer_config(cluster_type, group_id, **extra):
    """Get a :py:class:`yelp_kafka.config.KafkaConsumerConfig`
    for the local region kafka cluster at Yelp.

    :param cluster_type: kafka cluster type
        (ex.'scribe' or 'standard').
    :type cluster_type: string
    :param group_id: consumer group id
    :type group_id: string
    :param extra: extra arguments to use for creating the configuration
    :returns: :py:class:`yelp_kafka.config.KafkaConsumerConfig`
    """
    cluster = get_region_cluster(cluster_type, group_id)
    return KafkaConsumerConfig(group_id=group_id, cluster=cluster, **extra)
    def test__acquire_has_consumer(self, cluster, example_partitions,
                                   mock_post_rebalance_cb):
        config = KafkaConsumerConfig(
            self.group,
            cluster,
            post_rebalance_callback=mock_post_rebalance_cb)
        consumer = KafkaConsumerGroup([], config)

        consumer.consumer = mock.Mock()
        consumer._acquire(example_partitions)

        consumer.consumer.set_topic_partitions.assert_called_once_with(
            example_partitions)
        mock_post_rebalance_cb.assert_called_once_with(example_partitions)
    def test__release(self, cluster, example_partitions,
                      mock_pre_rebalance_cb):
        config = KafkaConsumerConfig(
            self.group,
            cluster,
            auto_commit_enable=True,
            pre_rebalance_callback=mock_pre_rebalance_cb)
        consumer = KafkaConsumerGroup([], config)

        mock_consumer = mock.Mock()
        consumer.consumer = mock_consumer
        consumer._release(example_partitions)

        mock_consumer.commit.assert_called_once_with()
        mock_consumer.set_topic_partitions.assert_called_once_with({})
        mock_pre_rebalance_cb.assert_called_once_with(example_partitions)
Esempio n. 15
0
 def test_close_no_commit(self, cluster):
     config = KafkaConsumerConfig(cluster=cluster,
                                  group_id='test_group',
                                  client_id='test_client_id',
                                  auto_commit=False)
     with mock_kafka() as (mock_client, mock_consumer):
         with mock.patch.object(
                 KafkaSimpleConsumer,
                 'commit',
                 autospec=True,
         ) as mock_commit:
             mock_obj = mock_consumer.return_value
             mock_obj.auto_commit = False
             consumer = KafkaSimpleConsumer('test_topic', config)
             consumer.connect()
             consumer.close()
             assert not mock_commit.called
             mock_client.return_value.close.assert_called_once_with()
Esempio n. 16
0
    def test_get_simple_consumer_args(self):
        cluster_config = ClusterConfig(type='mykafka',
                                       name='some_cluster',
                                       broker_list=['kafka:9092'],
                                       zookeeper='zookeeper:2181')

        config = KafkaConsumerConfig('some_group',
                                     cluster_config,
                                     auto_offset_reset='smallest',
                                     fetch_min_bytes=456,
                                     consumer_timeout_ms=5000)
        args = config.get_simple_consumer_args()

        assert args['buffer_size'] == MAX_MESSAGE_SIZE_BYTES
        assert args['auto_commit']
        assert args['auto_offset_reset'] == 'smallest'
        assert args['fetch_size_bytes'] == 456
        assert args['iter_timeout'] == 5
Esempio n. 17
0
    def test___ne__(self):
        consumer_config = {
            'buffer_size': 1024,
            'auto_commit_every_n': 100,
            'auto_commit_every_t': 20,
            'auto_commit': True,
            'fetch_size_bytes': 4096,
            'max_buffer_size': None,
            'iter_timeout': 120,
        }
        cluster_config = ClusterConfig(
            type='mykafka',
            name='some_cluster',
            broker_list=['kafka-cluster-1:9092', 'kafka-cluster-2:9092'],
            zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,')

        consumer_config_1 = {
            'fetch_size_bytes': 496,
            'auto_commit_every_t': 20,
            'auto_commit': True,
            'max_buffer_size': None,
            'buffer_size': 104,
            'iter_timeout': 12,
            'auto_commit_every_n': 10,
        }
        cluster_config_1 = ClusterConfig(
            type='mykafka',
            name='some_cluster',
            broker_list=['kafka-cluster-4:9092', 'kafka-cluster-1:9092'],
            zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,')

        # Different cluster config
        config1 = KafkaConsumerConfig("some_group", cluster_config,
                                      **consumer_config)
        config2 = KafkaConsumerConfig("some_group", cluster_config_1,
                                      **consumer_config)
        assert config1 != config2

        # Different consumer config
        config1 = KafkaConsumerConfig("some_group", cluster_config,
                                      **consumer_config)
        config2 = KafkaConsumerConfig("some_group", cluster_config,
                                      **consumer_config_1)
        assert config1 != config2

        # Different group ID
        config1 = KafkaConsumerConfig("some_group1", cluster_config,
                                      **consumer_config)
        config2 = KafkaConsumerConfig("some_group2", cluster_config,
                                      **consumer_config)
        assert config1 != config2
Esempio n. 18
0
    def test_get_kafka_consumer_config(self):
        cluster_config = ClusterConfig(type='mykafka',
                                       name='some_cluster',
                                       broker_list=['kafka:9092'],
                                       zookeeper='zookeeper:2181')

        config = KafkaConsumerConfig('some_group',
                                     cluster_config,
                                     fetch_message_max_bytes=123,
                                     auto_commit=False,
                                     iter_timeout=5)
        kafka_config = config.get_kafka_consumer_config()

        assert kafka_config['fetch_message_max_bytes'] == 123
        assert kafka_config['auto_commit_enable'] is False
        assert kafka_config[
            'auto_commit_interval_ms'] == AUTO_COMMIT_INTERVAL_SECS * 1000
        assert kafka_config['socket_timeout_ms'] == DEFAULT_CONSUMER_CONFIG[
            'socket_timeout_ms']
        assert kafka_config['consumer_timeout_ms'] == 5000
    def test_next(self, mock_consumer, mock_partitioner, cluster):
        config = KafkaConsumerConfig(self.group,
                                     cluster,
                                     consumer_timeout_ms=500)
        consumer = KafkaConsumerGroup([], config)
        consumer.partitioner = mock_partitioner()
        consumer.consumer = mock_consumer()

        def fake_next():
            time.sleep(1)
            raise ConsumerTimeout()

        consumer.consumer.next.side_effect = fake_next

        # The mock KafkaConsumer.next (called fake_next above) takes longer than
        # consumer_timeout_ms, so we should get a ConsumerTimeout from
        # KafkaConsumerGroup
        with pytest.raises(ConsumerTimeout):
            consumer.next()

        consumer.consumer.next.assert_called_once_with()
        consumer.partitioner.refresh.assert_called_once_with()
 def test__auto_commit_enabled_not_enabled(self, cluster):
     config = KafkaConsumerConfig(self.group,
                                  cluster,
                                  auto_commit_enable=False)
     consumer = KafkaConsumerGroup([], config)
     assert not consumer._auto_commit_enabled()