def test_huge_messages(self): huge_message, = self.send_messages(0, [ create_message(random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)), ]) # Create a consumer with the default buffer size consumer = self.consumer() # This consumer fails to get the message with self.assertRaises(ConsumerFetchSizeTooSmall): consumer.get_message(False, 0.1) consumer.stop() # Create a consumer with no fetch size limit big_consumer = self.consumer( max_buffer_size = None, partitions = [0], ) # Seek to the last message big_consumer.seek(-1, 2) # Consume giant message successfully message = big_consumer.get_message(block=False, timeout=10) self.assertIsNotNone(message) self.assertEqual(message.message.value, huge_message) big_consumer.stop()
def test_heartbeat_thread(kafka_broker, topic): group_id = 'test-group-' + random_string(6) consumer = KafkaConsumer(topic, bootstrap_servers=get_connect_str(kafka_broker), group_id=group_id, heartbeat_interval_ms=500) # poll until we have joined group / have assignment while not consumer.assignment(): consumer.poll(timeout_ms=100) assert consumer._coordinator.state is MemberState.STABLE last_poll = consumer._coordinator.heartbeat.last_poll last_beat = consumer._coordinator.heartbeat.last_send timeout = time.time() + 30 while True: if time.time() > timeout: raise RuntimeError('timeout waiting for heartbeat') if consumer._coordinator.heartbeat.last_send > last_beat: break time.sleep(0.5) assert consumer._coordinator.heartbeat.last_poll == last_poll consumer.poll(timeout_ms=100) assert consumer._coordinator.heartbeat.last_poll > last_poll consumer.close()
def test_lz4_incremental(): for i in range(1000): # lz4 max single block size is 4MB # make sure we test with multiple-blocks b1 = random_string(100).encode('utf-8') * 50000 b2 = lz4_decode(lz4_encode(b1)) assert len(b1) == len(b2) assert b1 == b2
def test_kafka_producer_proper_record_metadata(kafka_broker, compression): connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) producer = KafkaProducer(bootstrap_servers=connect_str, retries=5, max_block_ms=30000, compression_type=compression) magic = producer._max_usable_produce_magic() # record headers are supported in 0.11.0 if version() < (0, 11, 0): headers = None else: headers = [("Header Key", b"Header Value")] topic = random_string(5) future = producer.send(topic, value=b"Simple value", key=b"Simple key", headers=headers, timestamp_ms=9999999, partition=0) record = future.get(timeout=5) assert record is not None assert record.topic == topic assert record.partition == 0 assert record.topic_partition == TopicPartition(topic, 0) assert record.offset == 0 if magic >= 1: assert record.timestamp == 9999999 else: assert record.timestamp == -1 # NO_TIMESTAMP if magic >= 2: assert record.checksum is None elif magic == 1: assert record.checksum == 1370034956 else: assert record.checksum == 3296137851 assert record.serialized_key_size == 10 assert record.serialized_value_size == 12 if headers: assert record.serialized_header_size == 22 # generated timestamp case is skipped for broker 0.9 and below if magic == 0: return send_time = time.time() * 1000 future = producer.send(topic, value=b"Simple value", key=b"Simple key", timestamp_ms=None, partition=0) record = future.get(timeout=5) assert abs(record.timestamp - send_time) <= 1000 # Allow 1s deviation
def setUpClass(cls): if not os.environ.get('KAFKA_VERSION'): return cls.zk = ZookeeperFixture.instance() chroot = random_string(10) cls.server1 = KafkaFixture.instance(0, cls.zk, zk_chroot=chroot) cls.server2 = KafkaFixture.instance(1, cls.zk, zk_chroot=chroot) cls.server = cls.server1 # Bootstrapping server
def test_kafka_producer_proper_record_metadata(kafka_broker, compression): connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) producer = KafkaProducer(bootstrap_servers=connect_str, retries=5, max_block_ms=30000, compression_type=compression) magic = producer._max_usable_produce_magic() # record headers are supported in 0.11.0 if version() < (0, 11, 0): headers = None else: headers = [("Header Key", b"Header Value")] topic = random_string(5) future = producer.send( topic, value=b"Simple value", key=b"Simple key", headers=headers, timestamp_ms=9999999, partition=0) record = future.get(timeout=5) assert record is not None assert record.topic == topic assert record.partition == 0 assert record.topic_partition == TopicPartition(topic, 0) assert record.offset == 0 if magic >= 1: assert record.timestamp == 9999999 else: assert record.timestamp == -1 # NO_TIMESTAMP if magic >= 2: assert record.checksum is None elif magic == 1: assert record.checksum == 1370034956 else: assert record.checksum == 3296137851 assert record.serialized_key_size == 10 assert record.serialized_value_size == 12 if headers: assert record.serialized_header_size == 22 # generated timestamp case is skipped for broker 0.9 and below if magic == 0: return send_time = time.time() * 1000 future = producer.send( topic, value=b"Simple value", key=b"Simple key", timestamp_ms=None, partition=0) record = future.get(timeout=5) assert abs(record.timestamp - send_time) <= 1000 # Allow 1s deviation
def _send_random_messages(self, producer, topic, partition, n): for j in range(n): msg = 'msg {0}: {1}'.format(j, random_string(10)) log.debug('_send_random_message %s to %s:%d', msg, topic, partition) while True: try: producer.send_messages(topic, partition, msg.encode('utf-8')) except Exception: log.exception('failure in _send_random_messages - retrying') continue else: break
def _send_random_messages(self, producer, topic, partition, n): for j in range(n): msg = 'msg {0}: {1}'.format(j, random_string(10)) log.debug('_send_random_message %s to %s:%d', msg, topic, partition) while True: try: producer.send_messages(topic, partition, msg.encode('utf-8')) except Exception: log.exception( 'failure in _send_random_messages - retrying') continue else: break
def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): self.skipTest('Integration test requires KAFKA_VERSION') if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10)) self.topic = topic if self.create_client: self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port)) self.client_async = KafkaClient( bootstrap_servers='%s:%d' % (self.server.host, self.server.port)) timeout = time.time() + 30 while time.time() < timeout: try: self.client.load_metadata_for_topics( self.topic, ignore_leadernotavailable=False) if self.client.has_metadata_for_topic(topic): break except (LeaderNotAvailableError, InvalidTopicError): time.sleep(1) else: raise KafkaTimeoutError('Timeout loading topic metadata!') # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors # TODO: It might be a good idea to move this to self.client.ensure_topic_exists for partition in self.client.get_partition_ids_for_topic(self.topic): while True: try: req = OffsetRequestPayload(self.topic, partition, -1, 100) self.client.send_offset_request([req]) break except (NotLeaderForPartitionError, UnknownTopicOrPartitionError, FailedPayloadsError) as e: if time.time() > timeout: raise KafkaTimeoutError( 'Timeout loading topic metadata!') time.sleep(.1) self._messages = {}
def setUp(self): if not os.environ.get('KAFKA_VERSION'): self.skipTest('integration test requires KAFKA_VERSION') zk_chroot = random_string(10) replicas = 3 partitions = 3 # mini zookeeper, 3 kafka brokers self.zk = ZookeeperFixture.instance() kk_kwargs = {'zk_chroot': zk_chroot, 'replicas': replicas, 'partitions': partitions} self.brokers = [KafkaFixture.instance(i, self.zk, **kk_kwargs) for i in range(replicas)] hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers] self.client = SimpleClient(hosts, timeout=2) super(TestFailover, self).setUp()
def test_end_to_end(kafka_broker, compression): if compression == 'lz4': # LZ4 requires 0.8.2 if version() < (0, 8, 2): return # python-lz4 crashes on older versions of pypy elif platform.python_implementation() == 'PyPy': return connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) producer = KafkaProducer(bootstrap_servers=connect_str, retries=5, max_block_ms=30000, compression_type=compression, value_serializer=str.encode) consumer = KafkaConsumer(bootstrap_servers=connect_str, group_id=None, consumer_timeout_ms=30000, auto_offset_reset='earliest', value_deserializer=bytes.decode) topic = random_string(5) messages = 100 futures = [] for i in range(messages): futures.append(producer.send(topic, 'msg %d' % i)) ret = [f.get(timeout=30) for f in futures] assert len(ret) == messages producer.close() consumer.subscribe([topic]) msgs = set() for i in range(messages): try: msgs.add(next(consumer).value) except StopIteration: break assert msgs == set(['msg %d' % (i,) for i in range(messages)]) consumer.close()
def test_end_to_end(kafka_broker, compression): if compression == 'lz4': # LZ4 requires 0.8.2 if version() < (0, 8, 2): return # python-lz4 crashes on older versions of pypy elif platform.python_implementation() == 'PyPy': return connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) producer = KafkaProducer(bootstrap_servers=connect_str, retries=5, max_block_ms=30000, compression_type=compression, value_serializer=str.encode) consumer = KafkaConsumer(bootstrap_servers=connect_str, group_id=None, consumer_timeout_ms=30000, auto_offset_reset='earliest', value_deserializer=bytes.decode) topic = random_string(5) messages = 100 futures = [] for i in range(messages): futures.append(producer.send(topic, 'msg %d' % i)) ret = [f.get(timeout=30) for f in futures] assert len(ret) == messages producer.close() consumer.subscribe([topic]) msgs = set() for i in range(messages): try: msgs.add(next(consumer).value) except StopIteration: break assert msgs == set(['msg %d' % (i, ) for i in range(messages)]) consumer.close()
def test_large_messages(self): # Produce 10 "normal" size messages small_messages = self.send_messages(0, [ str(x) for x in range(10) ]) # Produce 10 messages that are large (bigger than default fetch size) large_messages = self.send_messages(0, [ random_string(5000) for x in range(10) ]) # Brokers prior to 0.11 will return the next message # if it is smaller than max_bytes (called buffer_size in SimpleConsumer) # Brokers 0.11 and later that store messages in v2 format # internally will return the next message only if the # full MessageSet is smaller than max_bytes. # For that reason, we set the max buffer size to a little more # than the size of all large messages combined consumer = self.consumer(max_buffer_size=60000) expected_messages = set(small_messages + large_messages) actual_messages = set([ x.message.value for x in consumer ]) self.assertEqual(expected_messages, actual_messages) consumer.stop()
def test_kafka_consumer__offset_commit_resume(self): GROUP_ID = random_string(10) self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Start a consumer consumer1 = self.kafka_consumer( group_id=GROUP_ID, enable_auto_commit=True, auto_commit_interval_ms=100, auto_offset_reset='earliest', ) # Grab the first 180 messages output_msgs1 = [] for _ in range(180): m = next(consumer1) output_msgs1.append(m) self.assert_message_count(output_msgs1, 180) consumer1.close() # The total offset across both partitions should be at 180 consumer2 = self.kafka_consumer( group_id=GROUP_ID, enable_auto_commit=True, auto_commit_interval_ms=100, auto_offset_reset='earliest', ) # 181-200 output_msgs2 = [] for _ in range(20): m = next(consumer2) output_msgs2.append(m) self.assert_message_count(output_msgs2, 20) self.assertEqual(len(set(output_msgs1) | set(output_msgs2)), 200) consumer2.close()
def setUp(self): super(KafkaIntegrationTestCase, self).setUp() if not os.environ.get('KAFKA_VERSION'): self.skipTest('Integration test requires KAFKA_VERSION') if not self.topic: topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10)) self.topic = topic if self.create_client: self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port)) timeout = time.time() + 30 while time.time() < timeout: try: self.client.load_metadata_for_topics(self.topic, ignore_leadernotavailable=False) if self.client.has_metadata_for_topic(topic): break except (LeaderNotAvailableError, InvalidTopicError): time.sleep(1) else: raise KafkaTimeoutError('Timeout loading topic metadata!') # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors # TODO: It might be a good idea to move this to self.client.ensure_topic_exists for partition in self.client.get_partition_ids_for_topic(self.topic): while True: try: req = OffsetRequestPayload(self.topic, partition, -1, 100) self.client.send_offset_request([req]) break except (NotLeaderForPartitionError, UnknownTopicOrPartitionError, FailedPayloadsError) as e: if time.time() > timeout: raise KafkaTimeoutError('Timeout loading topic metadata!') time.sleep(.1) self._messages = {}
def setUp(self): if not os.environ.get('KAFKA_VERSION'): self.skipTest('integration test requires KAFKA_VERSION') zk_chroot = random_string(10) replicas = 3 partitions = 3 # mini zookeeper, 3 kafka brokers self.zk = ZookeeperFixture.instance() kk_kwargs = { 'zk_chroot': zk_chroot, 'replicas': replicas, 'partitions': partitions } self.brokers = [ KafkaFixture.instance(i, self.zk, **kk_kwargs) for i in range(replicas) ] hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers] self.client = SimpleClient(hosts, timeout=2) super(TestFailover, self).setUp()
def test_kafka_consumer_max_bytes_one_msg(self): # We send to only 1 partition so we don't have parallel requests to 2 # nodes for data. self.send_messages(0, range(100, 200)) # Start a consumer. FetchResponse_v3 should always include at least 1 # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time # But 0.11.0.0 returns 1 MessageSet at a time when the messages are # stored in the new v2 format by the broker. # # DP Note: This is a strange test. The consumer shouldn't care # how many messages are included in a FetchResponse, as long as it is # non-zero. I would not mind if we deleted this test. It caused # a minor headache when testing 0.11.0.0. group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) consumer = self.kafka_consumer(group_id=group, auto_offset_reset='earliest', consumer_timeout_ms=5000, fetch_max_bytes=1) fetched_msgs = [next(consumer) for i in range(10)] self.assertEqual(len(fetched_msgs), 10) consumer.close()
def test_kafka_consumer_max_bytes_one_msg(self): # We send to only 1 partition so we don't have parallel requests to 2 # nodes for data. self.send_messages(0, range(100, 200)) # Start a consumer. FetchResponse_v3 should always include at least 1 # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time # But 0.11.0.0 returns 1 MessageSet at a time when the messages are # stored in the new v2 format by the broker. # # DP Note: This is a strange test. The consumer shouldn't care # how many messages are included in a FetchResponse, as long as it is # non-zero. I would not mind if we deleted this test. It caused # a minor headache when testing 0.11.0.0. group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) consumer = self.kafka_consumer( group_id=group, auto_offset_reset='earliest', consumer_timeout_ms=5000, fetch_max_bytes=1) fetched_msgs = [next(consumer) for i in range(10)] self.assertEqual(len(fetched_msgs), 10) consumer.close()
def test_switch_leader_keyed_producer(self): topic = self.topic producer = KeyedProducer(self.client, async_send=False) # Send 10 random messages for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) # kill leader for partition 0 self._kill_leader(topic, 0) recovered = False started = time.time() timeout = 60 while not recovered and (time.time() - started) < timeout: try: key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) if producer.partitioners[topic].partition(key) == 0: recovered = True except (FailedPayloadsError, KafkaConnectionError, RequestTimedOutError, NotLeaderForPartitionError): log.debug("caught exception sending message -- will retry") continue # Verify we successfully sent the message self.assertTrue(recovered) # send some more messages just to make sure no more exceptions for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg)
def test_group(kafka_broker, topic): num_partitions = 4 connect_str = get_connect_str(kafka_broker) consumers = {} stop = {} threads = {} messages = collections.defaultdict(list) group_id = 'test-group-' + random_string(6) def consumer_thread(i): assert i not in consumers assert i not in stop stop[i] = threading.Event() consumers[i] = KafkaConsumer(topic, bootstrap_servers=connect_str, group_id=group_id, heartbeat_interval_ms=500) while not stop[i].is_set(): for tp, records in six.itervalues(consumers[i].poll(100)): messages[i][tp].extend(records) consumers[i].close() consumers[i] = None stop[i] = None num_consumers = 4 for i in range(num_consumers): t = threading.Thread(target=consumer_thread, args=(i, )) t.start() threads[i] = t try: timeout = time.time() + 35 while True: for c in range(num_consumers): # Verify all consumers have been created if c not in consumers: break # Verify all consumers have an assignment elif not consumers[c].assignment(): break # If all consumers exist and have an assignment else: logging.info( 'All consumers have assignment... checking for stable group' ) # Verify all consumers are in the same generation # then log state and break while loop generations = set([ consumer._coordinator._generation.generation_id for consumer in list(consumers.values()) ]) # New generation assignment is not complete until # coordinator.rejoining = False rejoining = any([ consumer._coordinator.rejoining for consumer in list(consumers.values()) ]) if not rejoining and len(generations) == 1: for c, consumer in list(consumers.items()): logging.info( "[%s] %s %s: %s", c, consumer._coordinator._generation.generation_id, consumer._coordinator._generation.member_id, consumer.assignment()) break else: logging.info('Rejoining: %s, generations: %s', rejoining, generations) time.sleep(1) assert time.time() < timeout, "timeout waiting for assignments" logging.info('Group stabilized; verifying assignment') group_assignment = set() for c in range(num_consumers): assert len(consumers[c].assignment()) != 0 assert set.isdisjoint(consumers[c].assignment(), group_assignment) group_assignment.update(consumers[c].assignment()) assert group_assignment == set([ TopicPartition(topic, partition) for partition in range(num_partitions) ]) logging.info('Assignment looks good!') finally: logging.info('Shutting down %s consumers', num_consumers) for c in range(num_consumers): logging.info('Stopping consumer %s', c) stop[c].set() threads[c].join() threads[c] = None
def test_gzip(): for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = gzip_decode(gzip_encode(b1)) assert b1 == b2
def test_snappy(): for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = snappy_decode(snappy_encode(b1)) assert b1 == b2
def test_lz4_old(): for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = lz4_decode_old_kafka(lz4_encode_old_kafka(b1)) assert len(b1) == len(b2) assert b1 == b2
def topic(simple_client): topic = random_string(5) simple_client.ensure_topic_exists(topic) return topic
def topic(kafka_broker, request): """Return a topic fixture""" topic_name = '%s_%s' % (request.node.name, random_string(10)) kafka_broker.create_topics([topic_name]) return topic_name
def test_lz4(): for i in range(1000): b1 = random_string(100).encode('utf-8') b2 = lz4_decode(lz4_encode(b1)) assert len(b1) == len(b2) assert b1 == b2
def test_group(kafka_broker, topic): num_partitions = 4 connect_str = get_connect_str(kafka_broker) consumers = {} stop = {} threads = {} messages = collections.defaultdict(list) group_id = 'test-group-' + random_string(6) def consumer_thread(i): assert i not in consumers assert i not in stop stop[i] = threading.Event() consumers[i] = KafkaConsumer(topic, bootstrap_servers=connect_str, group_id=group_id, heartbeat_interval_ms=500) while not stop[i].is_set(): for tp, records in six.itervalues(consumers[i].poll(100)): messages[i][tp].extend(records) consumers[i].close() consumers[i] = None stop[i] = None num_consumers = 4 for i in range(num_consumers): t = threading.Thread(target=consumer_thread, args=(i,)) t.start() threads[i] = t try: timeout = time.time() + 35 while True: for c in range(num_consumers): # Verify all consumers have been created if c not in consumers: break # Verify all consumers have an assignment elif not consumers[c].assignment(): break # If all consumers exist and have an assignment else: logging.info('All consumers have assignment... checking for stable group') # Verify all consumers are in the same generation # then log state and break while loop generations = set([consumer._coordinator._generation.generation_id for consumer in list(consumers.values())]) # New generation assignment is not complete until # coordinator.rejoining = False rejoining = any([consumer._coordinator.rejoining for consumer in list(consumers.values())]) if not rejoining and len(generations) == 1: for c, consumer in list(consumers.items()): logging.info("[%s] %s %s: %s", c, consumer._coordinator._generation.generation_id, consumer._coordinator._generation.member_id, consumer.assignment()) break else: logging.info('Rejoining: %s, generations: %s', rejoining, generations) time.sleep(1) assert time.time() < timeout, "timeout waiting for assignments" logging.info('Group stabilized; verifying assignment') group_assignment = set() for c in range(num_consumers): assert len(consumers[c].assignment()) != 0 assert set.isdisjoint(consumers[c].assignment(), group_assignment) group_assignment.update(consumers[c].assignment()) assert group_assignment == set([ TopicPartition(topic, partition) for partition in range(num_partitions)]) logging.info('Assignment looks good!') finally: logging.info('Shutting down %s consumers', num_consumers) for c in range(num_consumers): logging.info('Stopping consumer %s', c) stop[c].set() threads[c].join() threads[c] = None