def mock_response_logs_parsed(): return [ ( ['a.c.stream1'], ClusterConfig('type1', 'cluster2', ['mybroker2'], 'zk_hosts/kafka2'), ), ( ['a.b.stream1', 'a.b.stream2'], ClusterConfig('type1', 'cluster1', ['mybroker'], 'zk_hosts/kafka'), ) ]
def test_get_all_clusters(self, mock_yaml): topology = TopologyConfiguration( cluster_type='mykafka', kafka_topology_path=TEST_BASE_KAFKA, ) actual_clusters = topology.get_all_clusters() expected_clusters = [ ClusterConfig('mykafka', 'cluster1', ["mybrokerhost1:9092"], "0.1.2.3,0.2.3.4/kafka"), ClusterConfig('mykafka', 'cluster2', ["mybrokerhost2:9092"], "0.3.4.5,0.4.5.6/kafka") ] assert sorted(expected_clusters) == sorted(actual_clusters)
def test___ne___broker_list(self): cluster_config1 = ClusterConfig( type='some_type', name='some_cluster', broker_list=['kafka-cluster-1:9092', 'kafka-cluster-2:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') # Different broker list cluster_config2 = ClusterConfig( type='some_type', name='some_cluster', broker_list=['kafka-cluster-1:9092', 'kafka-cluster-3:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') assert cluster_config1 != cluster_config2
def test___ne__(self): consumer_config = { 'buffer_size': 1024, 'auto_commit_every_n': 100, 'auto_commit_every_t': 20, 'auto_commit': True, 'fetch_size_bytes': 4096, 'max_buffer_size': None, 'iter_timeout': 120, } cluster_config = ClusterConfig( type='mykafka', name='some_cluster', broker_list=['kafka-cluster-1:9092', 'kafka-cluster-2:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') consumer_config_1 = { 'fetch_size_bytes': 496, 'auto_commit_every_t': 20, 'auto_commit': True, 'max_buffer_size': None, 'buffer_size': 104, 'iter_timeout': 12, 'auto_commit_every_n': 10, } cluster_config_1 = ClusterConfig( type='mykafka', name='some_cluster', broker_list=['kafka-cluster-4:9092', 'kafka-cluster-1:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') # Different cluster config config1 = KafkaConsumerConfig("some_group", cluster_config, **consumer_config) config2 = KafkaConsumerConfig("some_group", cluster_config_1, **consumer_config) assert config1 != config2 # Different consumer config config1 = KafkaConsumerConfig("some_group", cluster_config, **consumer_config) config2 = KafkaConsumerConfig("some_group", cluster_config, **consumer_config_1) assert config1 != config2 # Different group ID config1 = KafkaConsumerConfig("some_group1", cluster_config, **consumer_config) config2 = KafkaConsumerConfig("some_group2", cluster_config, **consumer_config) assert config1 != config2
def test_simple_consumer(): topic = create_random_topic(1, 1) messages = [str(i).encode("UTF-8") for i in range(100)] cluster_config = ClusterConfig(None, None, [KAFKA_URL], ZOOKEEPER_URL) producer = YelpKafkaSimpleProducer( cluster_config=cluster_config, report_metrics=False, client=KafkaClient(KAFKA_URL), ) producer.send_messages(topic, *messages) config = KafkaConsumerConfig('test', cluster_config, auto_offset_reset='smallest', auto_commit=False, consumer_timeout_ms=1000) consumer = KafkaSimpleConsumer(topic, config) with consumer: for expected_offset in range(100): message = consumer.get_message() assert message.offset == expected_offset assert message.partition == 0 assert message.value == str(expected_offset).encode("UTF-8")
def parse_as_cluster_config(config_obj): """Parse response config to Cluster-config type.""" return ClusterConfig( name=config_obj.name, type=config_obj.type, broker_list=config_obj.broker_list, zookeeper=config_obj.zookeeper, )
def test___eq___broker_str(self): cluster_config1 = ClusterConfig( type='some_type', name='some_cluster', broker_list='kafka-cluster-1:9092,kafka-cluster-2:9092', zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') cluster_config2 = ClusterConfig( type='some_type', name='some_cluster', broker_list='kafka-cluster-1:9092,kafka-cluster-2:9092', zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') assert cluster_config1 == cluster_config2 # Re-order the comma separated pair of brokers and zookeeper nodes cluster_config2 = ClusterConfig( type='some_type', name='some_cluster', broker_list='kafka-cluster-2:9092,kafka-cluster-1:9092', zookeeper='zookeeper-cluster-2:2181,zookeeper-cluster-1:2181,') assert cluster_config1 == cluster_config2
def test___eq___broker_list(self): cluster_config1 = ClusterConfig( type='some_type', name='some_cluster', broker_list=['kafka-cluster-1:9092', 'kafka-cluster-2:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') cluster_config2 = ClusterConfig( type='some_type', name='some_cluster', broker_list=['kafka-cluster-1:9092', 'kafka-cluster-2:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') assert cluster_config1 == cluster_config2 # Re-ordering the list of brokers cluster_config2 = ClusterConfig( type='some_type', name='some_cluster', broker_list=['kafka-cluster-2:9092', 'kafka-cluster-1:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') assert cluster_config1 == cluster_config2
def run_kafka_consumer_group_test(num_consumers, num_partitions): topic = create_random_topic(1, num_partitions) cluster_config = ClusterConfig(None, None, [KAFKA_URL], ZOOKEEPER_URL) config = KafkaConsumerConfig( 'test', cluster_config, auto_offset_reset='smallest', partitioner_cooldown=5, auto_commit_interval_messages=1, ) queue = Queue() def create_consumer(): def consume(): consumer = KafkaConsumerGroup([topic], config) with consumer: while True: try: message = consumer.next() queue.put(message) consumer.task_done(message) except ConsumerTimeout: return p = Process(target=consume) p.daemon = True return p consumer_processes = [create_consumer() for _ in range(num_consumers)] for consumer_process in consumer_processes: consumer_process.start() producer = YelpKafkaSimpleProducer( cluster_config=cluster_config, report_metrics=False, client=KafkaClient(KAFKA_URL), ) for i in range(100): producer.send_messages(topic, str(i).encode("UTF-8")) # wait until all 100 messages have been consumed while queue.qsize() < 100: time.sleep(0.1) received_messages = [] while True: try: message = queue.get(block=True, timeout=0.5) except Empty: break received_messages.append(int(message.value)) assert [i for i in range(100)] == sorted(received_messages)
def test___eq__(self): consumer_config = { 'buffer_size': 1024, 'auto_commit_every_n': 100, 'auto_commit_every_t': 20, 'auto_commit': True, 'fetch_size_bytes': 4096, 'max_buffer_size': None, 'iter_timeout': 120, } cluster_config = ClusterConfig( type='mykafka', name='some_cluster', broker_list=['kafka-cluster-1:9092', 'kafka-cluster-2:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') # Re-ordered consumer config dict consumer_config_reordered = { 'fetch_size_bytes': 4096, 'auto_commit_every_t': 20, 'auto_commit': True, 'max_buffer_size': None, 'buffer_size': 1024, 'iter_timeout': 120, 'auto_commit_every_n': 100, } cluster_config_reordered = ClusterConfig( type='mykafka', name='some_cluster', broker_list=['kafka-cluster-2:9092', 'kafka-cluster-1:9092'], zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') config1 = KafkaConsumerConfig("some_group", cluster_config, **consumer_config) config2 = KafkaConsumerConfig("some_group", cluster_config, **consumer_config) assert config1 == config2 # Let's use a re-ordered map with the same config config2 = KafkaConsumerConfig("some_group", cluster_config_reordered, **consumer_config_reordered) assert config1 == config2
def test___ne___broker_str(self): cluster_config1 = ClusterConfig( type='some_type', name='some_cluster', broker_list='kafka-cluster-1:9092,kafka-cluster-2:9092', zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') # Different comma separated pair of brokers cluster_config2 = ClusterConfig( type='some_type', name='some_cluster', broker_list='kafka-cluster-2:9092,kafka-cluster-3:9092', zookeeper='zookeeper-cluster-1:2181,zookeeper-cluster-2:2181,') assert cluster_config1 != cluster_config2 # Different comma separated pair of zookeeper nodes cluster_config2 = ClusterConfig( type='some_type', name='some_cluster', broker_list='kafka-cluster-1:9092,kafka-cluster-2:9092', zookeeper='zookeeper-cluster-2:2181,zookeeper-cluster-3:2181,') assert cluster_config1 != cluster_config2
def test_get_cluster_by_name(self, mock_yaml): topology = TopologyConfiguration( cluster_type='mykafka', kafka_topology_path=TEST_BASE_KAFKA, ) actual_cluster = topology.get_cluster_by_name('cluster1') expected_cluster = ClusterConfig('mykafka', 'cluster1', ["mybrokerhost1:9092"], "0.1.2.3,0.2.3.4/kafka") assert expected_cluster == actual_cluster with pytest.raises(ConfigurationError): topology.get_cluster_by_name('does-not-exist')
def test_discover_topics(mock_kafka, mock_topics): topics = { 'topic1'.encode(): [0, 1, 2, 3], 'topic2'.encode(): [0] } mock_topics.return_value = topics expected = dict([(topic.decode(), partitions) for topic, partitions in six.iteritems(topics)]) actual = discovery.discover_topics(ClusterConfig( 'type1', 'mycluster', ['mybroker'], 'zkhosts/kakfa', )) assert actual == expected
def test_get_local_cluster(self, mock_yaml): topology = TopologyConfiguration( cluster_type='mykafka', kafka_topology_path=TEST_BASE_KAFKA, ) mock_yaml.assert_called_once_with('/base/kafka_discovery/mykafka.yaml') actual_cluster = topology.get_local_cluster() expected_cluster = ClusterConfig( 'mykafka', 'cluster1', ['mybrokerhost1:9092'], '0.1.2.3,0.2.3.4/kafka', ) assert actual_cluster == expected_cluster
def test_get_kafka_connection_kwargs(mock_get_cluster): my_cluster = ClusterConfig( 'type1', 'cluster1', ['mybroker'], 'zk_hosts/kafka', ) mock_get_cluster.return_value = my_cluster with mock.patch( "yelp_kafka.discovery.KafkaClient", autospec=True ) as mock_kafka: mock_kafka.return_value = mock.sentinel.kafkaclient actual = discovery.get_kafka_connection("mycluster", "yelp-kafka", timeout=10) mock_kafka.assert_called_once_with( ['mybroker'], client_id='yelp-kafka', timeout=10, ) assert actual == mock.sentinel.kafkaclient
def test_get_simple_consumer_args(self): cluster_config = ClusterConfig(type='mykafka', name='some_cluster', broker_list=['kafka:9092'], zookeeper='zookeeper:2181') config = KafkaConsumerConfig('some_group', cluster_config, auto_offset_reset='smallest', fetch_min_bytes=456, consumer_timeout_ms=5000) args = config.get_simple_consumer_args() assert args['buffer_size'] == MAX_MESSAGE_SIZE_BYTES assert args['auto_commit'] assert args['auto_offset_reset'] == 'smallest' assert args['fetch_size_bytes'] == 456 assert args['iter_timeout'] == 5
def test_get_kafka_connection_error(mock_get_cluster): my_cluster = ClusterConfig( 'type1', 'cluster1', ['mybroker'], 'zk_hosts/kafka', ) mock_get_cluster.return_value = my_cluster with mock.patch( "yelp_kafka.discovery.KafkaClient", autospec=True ) as mock_kafka: mock_kafka.side_effect = Exception("Boom!") with pytest.raises(DiscoveryError): discovery.get_kafka_connection("mycluster", "yelp-kafka") mock_kafka.assert_called_once_with( ['mybroker'], client_id='yelp-kafka' )
def test_get_consumer_config(mock_get_cluster): my_cluster = ClusterConfig( 'type1', 'cluster1', ['mybroker'], 'zk_hosts/kafka', ) mock_get_cluster.return_value = my_cluster with mock.patch( "yelp_kafka.discovery.KafkaConsumerConfig", autospec=True ) as mock_config: mock_config.return_value = mock.sentinel.kafka_config actual = discovery.get_consumer_config( "mycluster", group_id='mygroup', auto_offset_reset='largest') mock_config.assert_called_once_with( cluster=my_cluster, group_id='mygroup', auto_offset_reset='largest' ) assert actual == mock.sentinel.kafka_config
def test_get_kafka_consumer_config(self): cluster_config = ClusterConfig(type='mykafka', name='some_cluster', broker_list=['kafka:9092'], zookeeper='zookeeper:2181') config = KafkaConsumerConfig('some_group', cluster_config, fetch_message_max_bytes=123, auto_commit=False, iter_timeout=5) kafka_config = config.get_kafka_consumer_config() assert kafka_config['fetch_message_max_bytes'] == 123 assert kafka_config['auto_commit_enable'] is False assert kafka_config[ 'auto_commit_interval_ms'] == AUTO_COMMIT_INTERVAL_SECS * 1000 assert kafka_config['socket_timeout_ms'] == DEFAULT_CONSUMER_CONFIG[ 'socket_timeout_ms'] assert kafka_config['consumer_timeout_ms'] == 5000
def cluster(): return ClusterConfig('cluster_type', 'mycluster', ['test_broker:9292'], 'test_cluster')
def test_discover_topics_error(mock_kafka, mock_topics): mock_topics.side_effect = Exception("Boom!") with pytest.raises(DiscoveryError): discovery.discover_topics( ClusterConfig('type1', 'mycluster', ['mybroker'], 'zkhosts') )
def mock_clusters(): return [ ClusterConfig('type1', 'cluster1', ['mybroker'], 'zk_hosts/kafka'), ClusterConfig('type1', 'cluster2', ['mybroker2'], 'zk_hosts2/kafa'), ]