def test_simple_consumer(): topic = create_random_topic(1, 1) messages = [str(i).encode("UTF-8") for i in range(100)] cluster_config = ClusterConfig(None, None, [KAFKA_URL], ZOOKEEPER_URL) producer = YelpKafkaSimpleProducer( cluster_config=cluster_config, report_metrics=False, client=KafkaClient(KAFKA_URL), ) producer.send_messages(topic, *messages) config = KafkaConsumerConfig('test', cluster_config, auto_offset_reset='smallest', auto_commit=False, consumer_timeout_ms=1000) consumer = KafkaSimpleConsumer(topic, config) with consumer: for expected_offset in range(100): message = consumer.get_message() assert message.offset == expected_offset assert message.partition == 0 assert message.value == str(expected_offset).encode("UTF-8")
def _kafka_consumer_config(self): """ The `KafkaConsumerConfig` for the Consumer. Notes: This is not a `@cached_property` since there is the possibility that the cluster_config could change during runtime and users could leverage this for responding to topology changes. `auto_commit` is set to False to ensure clients can determine when they want their topic offsets committed via commit_messages(..) """ return KafkaConsumerConfig( group_id=self.client_name, cluster=self._region_cluster_config, auto_offset_reset=self.auto_offset_reset, auto_commit=False, partitioner_cooldown=self.partitioner_cooldown, use_group_sha=self.use_group_sha, pre_rebalance_callback=self.pre_rebalance_callback, post_rebalance_callback=self. _apply_post_rebalance_callback_to_partition, # TODO(joshszep|DATAPIPE-2143): switch to offset_storage='kafka' # after all consumers are migrated offset_storage='dual', )
def config(cluster, mock_pre_rebalance_cb, mock_post_rebalance_cb): return KafkaConsumerConfig(cluster=cluster, group_id='test_group', client_id='test_client_id', partitioner_cooldown=0.5, pre_rebalance_callback=mock_pre_rebalance_cb, post_rebalance_callback=mock_post_rebalance_cb)
def test__acquire_has_no_consumer(self, mock_consumer, cluster, example_partitions): config = KafkaConsumerConfig(self.group, cluster) consumer = KafkaConsumerGroup([], config) consumer._acquire(example_partitions) mock_consumer.assert_called_once_with(example_partitions, **consumer.config)
def test__should_keep_trying_no_timeout(self, cluster): config = KafkaConsumerConfig(self.group, cluster, consumer_timeout_ms=-1) consumer = KafkaConsumerGroup([], config) long_time_ago = time.time() - 1000 assert consumer._should_keep_trying(long_time_ago)
def test__should_keep_trying_timed_out(self, mock_time, cluster): mock_time.return_value = 0 config = KafkaConsumerConfig(self.group, cluster, consumer_timeout_ms=1000) consumer = KafkaConsumerGroup([], config) over_a_second_ago = time.time() - 1.2 assert not consumer._should_keep_trying(over_a_second_ago)
def run_kafka_consumer_group_test(num_consumers, num_partitions): topic = create_random_topic(1, num_partitions) cluster_config = ClusterConfig(None, None, [KAFKA_URL], ZOOKEEPER_URL) config = KafkaConsumerConfig( 'test', cluster_config, auto_offset_reset='smallest', partitioner_cooldown=5, auto_commit_interval_messages=1, ) queue = Queue() def create_consumer(): def consume(): consumer = KafkaConsumerGroup([topic], config) with consumer: while True: try: message = consumer.next() queue.put(message) consumer.task_done(message) except ConsumerTimeout: return p = Process(target=consume) p.daemon = True return p consumer_processes = [create_consumer() for _ in range(num_consumers)] for consumer_process in consumer_processes: consumer_process.start() producer = YelpKafkaSimpleProducer( cluster_config=cluster_config, report_metrics=False, client=KafkaClient(KAFKA_URL), ) for i in range(100): producer.send_messages(topic, str(i).encode("UTF-8")) # wait until all 100 messages have been consumed while queue.qsize() < 100: time.sleep(0.1) received_messages = [] while True: try: message = queue.get(block=True, timeout=0.5) except Empty: break received_messages.append(int(message.value)) assert [i for i in range(100)] == sorted(received_messages)
def test___eq__(self): consumer_config = { 'buffer_size': 1024, 'auto_commit_every_n': 100, 'auto_commit_every_t': 20, 'auto_commit': True, 'fetch_size_bytes': 4096, 'max_buffer_size': None, 'iter_timeout': 120, } cluster_config = ClusterConfig( type='mykafka', name='some_cluster', broker_list=['kafka-cluster-1:9092', 'kafka-cluster-2:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') # Re-ordered consumer config dict consumer_config_reordered = { 'fetch_size_bytes': 4096, 'auto_commit_every_t': 20, 'auto_commit': True, 'max_buffer_size': None, 'buffer_size': 1024, 'iter_timeout': 120, 'auto_commit_every_n': 100, } cluster_config_reordered = ClusterConfig( type='mykafka', name='some_cluster', broker_list=['kafka-cluster-2:9092', 'kafka-cluster-1:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') config1 = KafkaConsumerConfig("some_group", cluster_config, **consumer_config) config2 = KafkaConsumerConfig("some_group", cluster_config, **consumer_config) assert config1 == config2 # Let's use a re-ordered map with the same config config2 = KafkaConsumerConfig("some_group", cluster_config_reordered, **consumer_config_reordered) assert config1 == config2
def group(self, _, mock_pre_rebalance_cb, mock_post_rebalance_cb): config = KafkaConsumerConfig( cluster={ 'broker_list': ['test_broker:9292'], 'zookeeper': 'zookeeper_uri1:2181,zookeeper_uri2:2181' }, group_id='test_group', client_id='test_client_id', max_termination_timeout_secs=0.1, pre_rebalance_callback=mock_pre_rebalance_cb, post_rebalance_callback=mock_post_rebalance_cb) return MultiprocessingConsumerGroup(self.topics, config, mock.Mock())
def test__release_retry(self, cluster): config = KafkaConsumerConfig(self.group, cluster, auto_commit_enable=True) consumer = KafkaConsumerGroup([], config) mock_consumer = mock.Mock() mock_consumer.set_topic_partitions.side_effect = KafkaUnavailableError consumer.consumer = mock_consumer with pytest.raises(KafkaUnavailableError): consumer._release({}) assert mock_consumer.set_topic_partitions.call_count == 2
def test_partitioner_use_sha_false(self, cluster): config = KafkaConsumerConfig( cluster=cluster, group_id='test_group', client_id='test_client_id', partitioner_cooldown=0.5, use_group_sha=False, pre_rebalance_callback=mock.Mock(), post_rebalance_callback=mock.Mock(), ) p = Partitioner(config, self.topics, mock.Mock(), mock.Mock()) assert p.zk_group_path == '/yelp-kafka/test_group'
def get_consumer_config(cluster_type, group_id, **extra): """Get a :py:class:`yelp_kafka.config.KafkaConsumerConfig` for the local region kafka cluster at Yelp. :param cluster_type: kafka cluster type (ex.'scribe' or 'standard'). :type cluster_type: string :param group_id: consumer group id :type group_id: string :param extra: extra arguments to use for creating the configuration :returns: :py:class:`yelp_kafka.config.KafkaConsumerConfig` """ cluster = get_region_cluster(cluster_type, group_id) return KafkaConsumerConfig(group_id=group_id, cluster=cluster, **extra)
def test__acquire_has_consumer(self, cluster, example_partitions, mock_post_rebalance_cb): config = KafkaConsumerConfig( self.group, cluster, post_rebalance_callback=mock_post_rebalance_cb) consumer = KafkaConsumerGroup([], config) consumer.consumer = mock.Mock() consumer._acquire(example_partitions) consumer.consumer.set_topic_partitions.assert_called_once_with( example_partitions) mock_post_rebalance_cb.assert_called_once_with(example_partitions)
def test__release(self, cluster, example_partitions, mock_pre_rebalance_cb): config = KafkaConsumerConfig( self.group, cluster, auto_commit_enable=True, pre_rebalance_callback=mock_pre_rebalance_cb) consumer = KafkaConsumerGroup([], config) mock_consumer = mock.Mock() consumer.consumer = mock_consumer consumer._release(example_partitions) mock_consumer.commit.assert_called_once_with() mock_consumer.set_topic_partitions.assert_called_once_with({}) mock_pre_rebalance_cb.assert_called_once_with(example_partitions)
def test_close_no_commit(self, cluster): config = KafkaConsumerConfig(cluster=cluster, group_id='test_group', client_id='test_client_id', auto_commit=False) with mock_kafka() as (mock_client, mock_consumer): with mock.patch.object( KafkaSimpleConsumer, 'commit', autospec=True, ) as mock_commit: mock_obj = mock_consumer.return_value mock_obj.auto_commit = False consumer = KafkaSimpleConsumer('test_topic', config) consumer.connect() consumer.close() assert not mock_commit.called mock_client.return_value.close.assert_called_once_with()
def test_get_simple_consumer_args(self): cluster_config = ClusterConfig(type='mykafka', name='some_cluster', broker_list=['kafka:9092'], zookeeper='zookeeper:2181') config = KafkaConsumerConfig('some_group', cluster_config, auto_offset_reset='smallest', fetch_min_bytes=456, consumer_timeout_ms=5000) args = config.get_simple_consumer_args() assert args['buffer_size'] == MAX_MESSAGE_SIZE_BYTES assert args['auto_commit'] assert args['auto_offset_reset'] == 'smallest' assert args['fetch_size_bytes'] == 456 assert args['iter_timeout'] == 5
def test___ne__(self): consumer_config = { 'buffer_size': 1024, 'auto_commit_every_n': 100, 'auto_commit_every_t': 20, 'auto_commit': True, 'fetch_size_bytes': 4096, 'max_buffer_size': None, 'iter_timeout': 120, } cluster_config = ClusterConfig( type='mykafka', name='some_cluster', broker_list=['kafka-cluster-1:9092', 'kafka-cluster-2:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') consumer_config_1 = { 'fetch_size_bytes': 496, 'auto_commit_every_t': 20, 'auto_commit': True, 'max_buffer_size': None, 'buffer_size': 104, 'iter_timeout': 12, 'auto_commit_every_n': 10, } cluster_config_1 = ClusterConfig( type='mykafka', name='some_cluster', broker_list=['kafka-cluster-4:9092', 'kafka-cluster-1:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') # Different cluster config config1 = KafkaConsumerConfig("some_group", cluster_config, **consumer_config) config2 = KafkaConsumerConfig("some_group", cluster_config_1, **consumer_config) assert config1 != config2 # Different consumer config config1 = KafkaConsumerConfig("some_group", cluster_config, **consumer_config) config2 = KafkaConsumerConfig("some_group", cluster_config, **consumer_config_1) assert config1 != config2 # Different group ID config1 = KafkaConsumerConfig("some_group1", cluster_config, **consumer_config) config2 = KafkaConsumerConfig("some_group2", cluster_config, **consumer_config) assert config1 != config2
def test_get_kafka_consumer_config(self): cluster_config = ClusterConfig(type='mykafka', name='some_cluster', broker_list=['kafka:9092'], zookeeper='zookeeper:2181') config = KafkaConsumerConfig('some_group', cluster_config, fetch_message_max_bytes=123, auto_commit=False, iter_timeout=5) kafka_config = config.get_kafka_consumer_config() assert kafka_config['fetch_message_max_bytes'] == 123 assert kafka_config['auto_commit_enable'] is False assert kafka_config[ 'auto_commit_interval_ms'] == AUTO_COMMIT_INTERVAL_SECS * 1000 assert kafka_config['socket_timeout_ms'] == DEFAULT_CONSUMER_CONFIG[ 'socket_timeout_ms'] assert kafka_config['consumer_timeout_ms'] == 5000
def test_next(self, mock_consumer, mock_partitioner, cluster): config = KafkaConsumerConfig(self.group, cluster, consumer_timeout_ms=500) consumer = KafkaConsumerGroup([], config) consumer.partitioner = mock_partitioner() consumer.consumer = mock_consumer() def fake_next(): time.sleep(1) raise ConsumerTimeout() consumer.consumer.next.side_effect = fake_next # The mock KafkaConsumer.next (called fake_next above) takes longer than # consumer_timeout_ms, so we should get a ConsumerTimeout from # KafkaConsumerGroup with pytest.raises(ConsumerTimeout): consumer.next() consumer.consumer.next.assert_called_once_with() consumer.partitioner.refresh.assert_called_once_with()
def test__auto_commit_enabled_not_enabled(self, cluster): config = KafkaConsumerConfig(self.group, cluster, auto_commit_enable=False) consumer = KafkaConsumerGroup([], config) assert not consumer._auto_commit_enabled()